{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9763241396143519, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 17.4805, "active_queue_size": 16384.0, "cl_loss": 162.1371, "doc_norm": 8.4345, "encoder_q-embeddings": 49519.6914, "encoder_q-layer.0": 61674.6797, "encoder_q-layer.1": 47259.0117, "encoder_q-layer.10": 129020.1406, "encoder_q-layer.11": 74829.5312, "encoder_q-layer.2": 53620.668, "encoder_q-layer.3": 55530.4062, "encoder_q-layer.4": 64391.8125, "encoder_q-layer.5": 73170.2344, "encoder_q-layer.6": 96417.875, "encoder_q-layer.7": 113654.1172, "encoder_q-layer.8": 140427.4844, "encoder_q-layer.9": 109299.3516, "epoch": 0.0, "inbatch_neg_score": 40.0812, "inbatch_pos_score": 48.6875, "learning_rate": 5.000000000000001e-07, "loss": 162.1371, "norm_diff": 0.422, "norm_loss": 0.0, "num_token_doc": 66.7563, "num_token_overlap": 15.8248, "num_token_query": 42.2848, "num_token_union": 68.3857, "num_word_context": 202.3315, "num_word_doc": 49.7714, "num_word_query": 31.9242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 119685.8292, "preclip_grad_norm_avg": 0.0011, "q@queue_neg_score": 40.1562, "query_norm": 8.0125, "queue_k_norm": 8.4196, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2848, "sent_len_1": 66.7563, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9963, "stdk": 0.1812, "stdq": 0.185, "stdqueue_k": 0.1805, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 18.6523, "active_queue_size": 16384.0, "cl_loss": 103.5126, "doc_norm": 8.3285, "encoder_q-embeddings": 14396.1318, "encoder_q-layer.0": 14424.1475, "encoder_q-layer.1": 18293.5625, "encoder_q-layer.10": 34097.9922, "encoder_q-layer.11": 37511.8086, "encoder_q-layer.2": 22961.6172, "encoder_q-layer.3": 22349.2656, "encoder_q-layer.4": 21688.9453, "encoder_q-layer.5": 20287.2031, "encoder_q-layer.6": 23414.5508, "encoder_q-layer.7": 25438.3359, "encoder_q-layer.8": 30336.8203, "encoder_q-layer.9": 24805.9551, "epoch": 0.0, "inbatch_neg_score": 36.3064, "inbatch_pos_score": 41.6875, "learning_rate": 1.0000000000000002e-06, "loss": 103.5126, "norm_diff": 1.1504, "norm_loss": 0.0, "num_token_doc": 66.7272, "num_token_overlap": 15.8535, "num_token_query": 42.2827, "num_token_union": 68.3686, "num_word_context": 202.0541, "num_word_doc": 49.7635, "num_word_query": 31.9487, "postclip_grad_norm": 1.0, "preclip_grad_norm": 34320.2262, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 36.2188, "query_norm": 7.1781, "queue_k_norm": 8.3493, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2827, "sent_len_1": 66.7272, "sent_len_max_0": 127.9775, "sent_len_max_1": 189.7475, "stdk": 0.1763, "stdq": 0.1358, "stdqueue_k": 0.1785, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 20.0195, "active_queue_size": 16384.0, "cl_loss": 65.2294, "doc_norm": 8.2096, "encoder_q-embeddings": 4273.9634, "encoder_q-layer.0": 3904.0378, "encoder_q-layer.1": 5380.0991, "encoder_q-layer.10": 7813.5635, "encoder_q-layer.11": 14508.8164, "encoder_q-layer.2": 5411.5474, "encoder_q-layer.3": 5048.7861, "encoder_q-layer.4": 4851.3911, "encoder_q-layer.5": 4994.9941, "encoder_q-layer.6": 5522.9546, "encoder_q-layer.7": 5697.4243, "encoder_q-layer.8": 6335.292, "encoder_q-layer.9": 4990.6631, "epoch": 0.0, "inbatch_neg_score": 34.1276, "inbatch_pos_score": 37.875, "learning_rate": 1.5e-06, "loss": 65.2294, "norm_diff": 1.3113, "norm_loss": 0.0, "num_token_doc": 66.8151, "num_token_overlap": 15.7789, "num_token_query": 42.1378, "num_token_union": 68.4539, "num_word_context": 202.2068, "num_word_doc": 49.8677, "num_word_query": 31.7932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10484.5437, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 34.1562, "query_norm": 6.8983, "queue_k_norm": 8.2075, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1378, "sent_len_1": 66.8151, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1975, "stdk": 0.1731, "stdq": 0.1132, "stdqueue_k": 0.173, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 22.6562, "active_queue_size": 16384.0, "cl_loss": 46.7296, "doc_norm": 8.0461, "encoder_q-embeddings": 3980.2368, "encoder_q-layer.0": 4557.5957, "encoder_q-layer.1": 5618.3838, "encoder_q-layer.10": 9490.1602, "encoder_q-layer.11": 15555.9766, "encoder_q-layer.2": 5277.4443, "encoder_q-layer.3": 4348.4819, "encoder_q-layer.4": 4657.1196, "encoder_q-layer.5": 5059.0596, "encoder_q-layer.6": 5547.3667, "encoder_q-layer.7": 6005.5205, "encoder_q-layer.8": 7433.4614, "encoder_q-layer.9": 6135.7646, "epoch": 0.0, "inbatch_neg_score": 31.7117, "inbatch_pos_score": 34.4062, "learning_rate": 2.0000000000000003e-06, "loss": 46.7296, "norm_diff": 1.2335, "norm_loss": 0.0, "num_token_doc": 66.868, "num_token_overlap": 15.8811, "num_token_query": 42.3616, "num_token_union": 68.4665, "num_word_context": 202.1151, "num_word_doc": 49.8523, "num_word_query": 32.0198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10641.3366, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 31.6875, "query_norm": 6.8126, "queue_k_norm": 8.0502, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3616, "sent_len_1": 66.868, "sent_len_max_0": 128.0, "sent_len_max_1": 191.7713, "stdk": 0.167, "stdq": 0.1007, "stdqueue_k": 0.1681, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 21.0938, "active_queue_size": 16384.0, "cl_loss": 35.4612, "doc_norm": 7.8587, "encoder_q-embeddings": 3143.1772, "encoder_q-layer.0": 2677.6577, "encoder_q-layer.1": 3004.3604, "encoder_q-layer.10": 7952.7261, "encoder_q-layer.11": 11776.1133, "encoder_q-layer.2": 3284.1187, "encoder_q-layer.3": 3451.5977, "encoder_q-layer.4": 3972.0396, "encoder_q-layer.5": 4078.6033, "encoder_q-layer.6": 4313.5327, "encoder_q-layer.7": 4286.7173, "encoder_q-layer.8": 5264.4932, "encoder_q-layer.9": 4358.6021, "epoch": 0.0, "inbatch_neg_score": 29.0495, "inbatch_pos_score": 31.1562, "learning_rate": 2.5e-06, "loss": 35.4612, "norm_diff": 1.0306, "norm_loss": 0.0, "num_token_doc": 66.7352, "num_token_overlap": 15.8339, "num_token_query": 42.4401, "num_token_union": 68.5205, "num_word_context": 201.8788, "num_word_doc": 49.7935, "num_word_query": 32.0609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7674.7247, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 29.0, "query_norm": 6.8282, "queue_k_norm": 7.8836, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4401, "sent_len_1": 66.7352, "sent_len_max_0": 128.0, "sent_len_max_1": 191.655, "stdk": 0.1604, "stdq": 0.0976, "stdqueue_k": 0.1625, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 24.3164, "active_queue_size": 16384.0, "cl_loss": 29.0201, "doc_norm": 7.6836, "encoder_q-embeddings": 3640.9695, "encoder_q-layer.0": 3258.9995, "encoder_q-layer.1": 3513.4673, "encoder_q-layer.10": 5974.5581, "encoder_q-layer.11": 8432.1855, "encoder_q-layer.2": 3917.9041, "encoder_q-layer.3": 4293.2222, "encoder_q-layer.4": 4535.5063, "encoder_q-layer.5": 4886.6978, "encoder_q-layer.6": 4409.917, "encoder_q-layer.7": 4485.8477, "encoder_q-layer.8": 5075.9761, "encoder_q-layer.9": 3631.8623, "epoch": 0.01, "inbatch_neg_score": 26.2309, "inbatch_pos_score": 28.0156, "learning_rate": 3e-06, "loss": 29.0201, "norm_diff": 1.1423, "norm_loss": 0.0, "num_token_doc": 66.7459, "num_token_overlap": 15.8305, "num_token_query": 42.386, "num_token_union": 68.5231, "num_word_context": 202.5753, "num_word_doc": 49.8081, "num_word_query": 32.0275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6944.9551, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 26.1719, "query_norm": 6.5413, "queue_k_norm": 7.7057, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.386, "sent_len_1": 66.7459, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2312, "stdk": 0.1553, "stdq": 0.0924, "stdqueue_k": 0.1564, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 19.8242, "active_queue_size": 16384.0, "cl_loss": 24.8087, "doc_norm": 7.5147, "encoder_q-embeddings": 4123.1533, "encoder_q-layer.0": 3277.6694, "encoder_q-layer.1": 3996.5393, "encoder_q-layer.10": 8739.5479, "encoder_q-layer.11": 11788.9102, "encoder_q-layer.2": 4582.1289, "encoder_q-layer.3": 5160.0571, "encoder_q-layer.4": 5452.1133, "encoder_q-layer.5": 5854.791, "encoder_q-layer.6": 5522.0088, "encoder_q-layer.7": 5477.1802, "encoder_q-layer.8": 5329.875, "encoder_q-layer.9": 4306.8154, "epoch": 0.01, "inbatch_neg_score": 23.4893, "inbatch_pos_score": 24.9375, "learning_rate": 3.5000000000000004e-06, "loss": 24.8087, "norm_diff": 1.4581, "norm_loss": 0.0, "num_token_doc": 66.5024, "num_token_overlap": 15.7827, "num_token_query": 42.3758, "num_token_union": 68.3941, "num_word_context": 202.1521, "num_word_doc": 49.6746, "num_word_query": 32.0017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8689.7516, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 23.4531, "query_norm": 6.0567, "queue_k_norm": 7.5322, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3758, "sent_len_1": 66.5024, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.1625, "stdk": 0.1504, "stdq": 0.087, "stdqueue_k": 0.1509, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 23.3398, "active_queue_size": 16384.0, "cl_loss": 21.3984, "doc_norm": 7.3359, "encoder_q-embeddings": 3502.5718, "encoder_q-layer.0": 3106.7312, "encoder_q-layer.1": 3889.5867, "encoder_q-layer.10": 5499.2153, "encoder_q-layer.11": 7981.3306, "encoder_q-layer.2": 4216.4966, "encoder_q-layer.3": 4607.3071, "encoder_q-layer.4": 4782.1997, "encoder_q-layer.5": 4868.8296, "encoder_q-layer.6": 4671.9668, "encoder_q-layer.7": 4136.4976, "encoder_q-layer.8": 4322.5708, "encoder_q-layer.9": 2935.8091, "epoch": 0.01, "inbatch_neg_score": 18.7611, "inbatch_pos_score": 20.0781, "learning_rate": 4.000000000000001e-06, "loss": 21.3984, "norm_diff": 2.1299, "norm_loss": 0.0, "num_token_doc": 66.7126, "num_token_overlap": 15.7957, "num_token_query": 42.2652, "num_token_union": 68.4225, "num_word_context": 202.2671, "num_word_doc": 49.8042, "num_word_query": 31.9184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6691.1955, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 18.7031, "query_norm": 5.206, "queue_k_norm": 7.3651, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2652, "sent_len_1": 66.7126, "sent_len_max_0": 128.0, "sent_len_max_1": 188.025, "stdk": 0.1428, "stdq": 0.0812, "stdqueue_k": 0.1454, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 22.9492, "active_queue_size": 16384.0, "cl_loss": 17.8752, "doc_norm": 7.1919, "encoder_q-embeddings": 3288.2812, "encoder_q-layer.0": 2816.8796, "encoder_q-layer.1": 3421.563, "encoder_q-layer.10": 5049.5615, "encoder_q-layer.11": 7948.6733, "encoder_q-layer.2": 3851.4895, "encoder_q-layer.3": 4420.0518, "encoder_q-layer.4": 5047.0542, "encoder_q-layer.5": 4845.5854, "encoder_q-layer.6": 4224.2124, "encoder_q-layer.7": 3666.1948, "encoder_q-layer.8": 3204.2224, "encoder_q-layer.9": 2301.9478, "epoch": 0.01, "inbatch_neg_score": 12.9007, "inbatch_pos_score": 13.9688, "learning_rate": 4.5e-06, "loss": 17.8752, "norm_diff": 3.0236, "norm_loss": 0.0, "num_token_doc": 66.8776, "num_token_overlap": 15.8768, "num_token_query": 42.4987, "num_token_union": 68.5656, "num_word_context": 202.5508, "num_word_doc": 49.9117, "num_word_query": 32.1171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6295.2394, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 12.8828, "query_norm": 4.1683, "queue_k_norm": 7.2093, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4987, "sent_len_1": 66.8776, "sent_len_max_0": 128.0, "sent_len_max_1": 188.81, "stdk": 0.1386, "stdq": 0.0776, "stdqueue_k": 0.1398, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 21.2891, "active_queue_size": 16384.0, "cl_loss": 15.033, "doc_norm": 7.0511, "encoder_q-embeddings": 8696.6533, "encoder_q-layer.0": 8959.459, "encoder_q-layer.1": 10382.6123, "encoder_q-layer.10": 5151.2236, "encoder_q-layer.11": 7533.585, "encoder_q-layer.2": 8913.0254, "encoder_q-layer.3": 8981.8369, "encoder_q-layer.4": 8797.8545, "encoder_q-layer.5": 8714.7969, "encoder_q-layer.6": 7874.4839, "encoder_q-layer.7": 5910.4009, "encoder_q-layer.8": 4371.6167, "encoder_q-layer.9": 2244.7659, "epoch": 0.01, "inbatch_neg_score": 9.0565, "inbatch_pos_score": 9.8828, "learning_rate": 5e-06, "loss": 15.033, "norm_diff": 3.7928, "norm_loss": 0.0, "num_token_doc": 66.777, "num_token_overlap": 15.8773, "num_token_query": 42.4331, "num_token_union": 68.4562, "num_word_context": 202.0964, "num_word_doc": 49.7998, "num_word_query": 32.0447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11579.3434, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 9.0547, "query_norm": 3.2583, "queue_k_norm": 7.0615, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4331, "sent_len_1": 66.777, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1438, "stdk": 0.134, "stdq": 0.0714, "stdqueue_k": 0.134, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 22.6562, "active_queue_size": 16384.0, "cl_loss": 13.085, "doc_norm": 6.9307, "encoder_q-embeddings": 8366.6836, "encoder_q-layer.0": 7634.4492, "encoder_q-layer.1": 8868.9395, "encoder_q-layer.10": 3953.0676, "encoder_q-layer.11": 6409.1392, "encoder_q-layer.2": 9324.3115, "encoder_q-layer.3": 9824.25, "encoder_q-layer.4": 11070.9883, "encoder_q-layer.5": 12265.2109, "encoder_q-layer.6": 10947.876, "encoder_q-layer.7": 10341.4092, "encoder_q-layer.8": 10319.918, "encoder_q-layer.9": 2364.0828, "epoch": 0.01, "inbatch_neg_score": 5.063, "inbatch_pos_score": 5.7812, "learning_rate": 5.500000000000001e-06, "loss": 13.085, "norm_diff": 4.3116, "norm_loss": 0.0, "num_token_doc": 66.8266, "num_token_overlap": 15.8203, "num_token_query": 42.2374, "num_token_union": 68.4831, "num_word_context": 202.2569, "num_word_doc": 49.8782, "num_word_query": 31.9118, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13134.198, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 5.0625, "query_norm": 2.6191, "queue_k_norm": 6.9274, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2374, "sent_len_1": 66.8266, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4638, "stdk": 0.1293, "stdq": 0.0654, "stdqueue_k": 0.1291, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 21.875, "active_queue_size": 16384.0, "cl_loss": 11.8946, "doc_norm": 6.802, "encoder_q-embeddings": 7753.769, "encoder_q-layer.0": 7239.3643, "encoder_q-layer.1": 8475.6406, "encoder_q-layer.10": 3397.5122, "encoder_q-layer.11": 5887.6992, "encoder_q-layer.2": 8172.1719, "encoder_q-layer.3": 8340.7148, "encoder_q-layer.4": 9140.5752, "encoder_q-layer.5": 9213.0303, "encoder_q-layer.6": 8069.6162, "encoder_q-layer.7": 6299.3999, "encoder_q-layer.8": 4713.4141, "encoder_q-layer.9": 1684.6239, "epoch": 0.01, "inbatch_neg_score": 3.9806, "inbatch_pos_score": 4.6172, "learning_rate": 6e-06, "loss": 11.8946, "norm_diff": 4.5275, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 15.7812, "num_token_query": 42.131, "num_token_union": 68.4381, "num_word_context": 201.7482, "num_word_doc": 49.836, "num_word_query": 31.8253, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10565.6784, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.9707, "query_norm": 2.2745, "queue_k_norm": 6.8063, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.131, "sent_len_1": 66.8029, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.9475, "stdk": 0.1231, "stdq": 0.0601, "stdqueue_k": 0.1239, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 23.1445, "active_queue_size": 16384.0, "cl_loss": 10.934, "doc_norm": 6.6844, "encoder_q-embeddings": 17001.5527, "encoder_q-layer.0": 15444.4307, "encoder_q-layer.1": 16211.8867, "encoder_q-layer.10": 3169.0784, "encoder_q-layer.11": 6005.7808, "encoder_q-layer.2": 16638.6113, "encoder_q-layer.3": 16887.1816, "encoder_q-layer.4": 18897.4551, "encoder_q-layer.5": 19470.6035, "encoder_q-layer.6": 16134.8926, "encoder_q-layer.7": 13495.0859, "encoder_q-layer.8": 13637.2256, "encoder_q-layer.9": 2456.4954, "epoch": 0.01, "inbatch_neg_score": 3.8488, "inbatch_pos_score": 4.4727, "learning_rate": 6.5000000000000004e-06, "loss": 10.934, "norm_diff": 4.5601, "norm_loss": 0.0, "num_token_doc": 66.7197, "num_token_overlap": 15.8706, "num_token_query": 42.3981, "num_token_union": 68.4291, "num_word_context": 202.6411, "num_word_doc": 49.7897, "num_word_query": 32.0333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21775.5775, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 3.8438, "query_norm": 2.1243, "queue_k_norm": 6.6971, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3981, "sent_len_1": 66.7197, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2488, "stdk": 0.119, "stdq": 0.0573, "stdqueue_k": 0.1189, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 22.6562, "active_queue_size": 16384.0, "cl_loss": 10.3327, "doc_norm": 6.5829, "encoder_q-embeddings": 6528.1538, "encoder_q-layer.0": 5986.7881, "encoder_q-layer.1": 6702.5327, "encoder_q-layer.10": 4197.6074, "encoder_q-layer.11": 6916.8584, "encoder_q-layer.2": 6799.186, "encoder_q-layer.3": 6556.6768, "encoder_q-layer.4": 7038.7544, "encoder_q-layer.5": 7538.2695, "encoder_q-layer.6": 6312.0171, "encoder_q-layer.7": 4113.1343, "encoder_q-layer.8": 3476.3127, "encoder_q-layer.9": 1775.041, "epoch": 0.01, "inbatch_neg_score": 3.1991, "inbatch_pos_score": 3.7969, "learning_rate": 7.000000000000001e-06, "loss": 10.3327, "norm_diff": 4.5455, "norm_loss": 0.0, "num_token_doc": 67.0675, "num_token_overlap": 15.9239, "num_token_query": 42.4759, "num_token_union": 68.6521, "num_word_context": 202.15, "num_word_doc": 50.087, "num_word_query": 32.081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8674.5561, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 3.1855, "query_norm": 2.0374, "queue_k_norm": 6.5903, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4759, "sent_len_1": 67.0675, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.525, "stdk": 0.1128, "stdq": 0.0548, "stdqueue_k": 0.1141, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 24.1211, "active_queue_size": 16384.0, "cl_loss": 10.1126, "doc_norm": 6.4877, "encoder_q-embeddings": 5665.7471, "encoder_q-layer.0": 5157.6602, "encoder_q-layer.1": 5729.7378, "encoder_q-layer.10": 3170.4478, "encoder_q-layer.11": 5547.6826, "encoder_q-layer.2": 6359.8081, "encoder_q-layer.3": 6241.5107, "encoder_q-layer.4": 6719.2217, "encoder_q-layer.5": 6686.3032, "encoder_q-layer.6": 5384.7661, "encoder_q-layer.7": 3668.8184, "encoder_q-layer.8": 3319.5229, "encoder_q-layer.9": 1560.4086, "epoch": 0.01, "inbatch_neg_score": 2.562, "inbatch_pos_score": 3.1406, "learning_rate": 7.5e-06, "loss": 10.1126, "norm_diff": 4.5152, "norm_loss": 0.0, "num_token_doc": 66.6088, "num_token_overlap": 15.7875, "num_token_query": 42.2693, "num_token_union": 68.4198, "num_word_context": 202.2152, "num_word_doc": 49.664, "num_word_query": 31.8921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7666.2317, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.5508, "query_norm": 1.9725, "queue_k_norm": 6.4996, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2693, "sent_len_1": 66.6088, "sent_len_max_0": 128.0, "sent_len_max_1": 190.595, "stdk": 0.1088, "stdq": 0.053, "stdqueue_k": 0.1101, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 23.8281, "active_queue_size": 16384.0, "cl_loss": 9.8256, "doc_norm": 6.4021, "encoder_q-embeddings": 3402.8938, "encoder_q-layer.0": 3293.9346, "encoder_q-layer.1": 3722.457, "encoder_q-layer.10": 4305.9399, "encoder_q-layer.11": 6565.5146, "encoder_q-layer.2": 3796.8992, "encoder_q-layer.3": 3883.5696, "encoder_q-layer.4": 4213.8574, "encoder_q-layer.5": 4457.7407, "encoder_q-layer.6": 3594.5012, "encoder_q-layer.7": 2690.3506, "encoder_q-layer.8": 3106.8232, "encoder_q-layer.9": 2084.052, "epoch": 0.02, "inbatch_neg_score": 2.1373, "inbatch_pos_score": 2.7441, "learning_rate": 8.000000000000001e-06, "loss": 9.8256, "norm_diff": 4.4042, "norm_loss": 0.0, "num_token_doc": 66.5728, "num_token_overlap": 15.8094, "num_token_query": 42.2192, "num_token_union": 68.3496, "num_word_context": 201.8937, "num_word_doc": 49.7037, "num_word_query": 31.9144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5511.6687, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.1289, "query_norm": 1.9979, "queue_k_norm": 6.4019, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2192, "sent_len_1": 66.5728, "sent_len_max_0": 128.0, "sent_len_max_1": 188.745, "stdk": 0.105, "stdq": 0.0555, "stdqueue_k": 0.1055, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 25.3906, "active_queue_size": 16384.0, "cl_loss": 9.72, "doc_norm": 6.2945, "encoder_q-embeddings": 5612.9263, "encoder_q-layer.0": 5964.3081, "encoder_q-layer.1": 5951.1836, "encoder_q-layer.10": 2735.8794, "encoder_q-layer.11": 5344.0, "encoder_q-layer.2": 6376.208, "encoder_q-layer.3": 6748.5835, "encoder_q-layer.4": 7406.9902, "encoder_q-layer.5": 8354.9102, "encoder_q-layer.6": 7724.2305, "encoder_q-layer.7": 6634.7461, "encoder_q-layer.8": 7103.3208, "encoder_q-layer.9": 1834.5883, "epoch": 0.02, "inbatch_neg_score": 2.0757, "inbatch_pos_score": 2.6699, "learning_rate": 8.500000000000002e-06, "loss": 9.72, "norm_diff": 4.2712, "norm_loss": 0.0, "num_token_doc": 66.5537, "num_token_overlap": 15.7835, "num_token_query": 42.1792, "num_token_union": 68.252, "num_word_context": 201.7533, "num_word_doc": 49.6678, "num_word_query": 31.8411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9031.8511, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.0723, "query_norm": 2.0233, "queue_k_norm": 6.3096, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1792, "sent_len_1": 66.5537, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.59, "stdk": 0.1005, "stdq": 0.0541, "stdqueue_k": 0.1013, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 24.1211, "active_queue_size": 16384.0, "cl_loss": 9.5706, "doc_norm": 6.1909, "encoder_q-embeddings": 6619.4014, "encoder_q-layer.0": 5930.7163, "encoder_q-layer.1": 6941.9258, "encoder_q-layer.10": 3310.4705, "encoder_q-layer.11": 5562.709, "encoder_q-layer.2": 7682.252, "encoder_q-layer.3": 7457.748, "encoder_q-layer.4": 7690.5059, "encoder_q-layer.5": 7558.5713, "encoder_q-layer.6": 5233.8857, "encoder_q-layer.7": 3422.4136, "encoder_q-layer.8": 3470.1279, "encoder_q-layer.9": 1977.1383, "epoch": 0.02, "inbatch_neg_score": 1.3637, "inbatch_pos_score": 1.915, "learning_rate": 9e-06, "loss": 9.5706, "norm_diff": 4.1807, "norm_loss": 0.0, "num_token_doc": 66.8627, "num_token_overlap": 15.8372, "num_token_query": 42.3792, "num_token_union": 68.5976, "num_word_context": 202.5795, "num_word_doc": 49.9029, "num_word_query": 31.9676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8757.605, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.3662, "query_norm": 2.0103, "queue_k_norm": 6.2074, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3792, "sent_len_1": 66.8627, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.7388, "stdk": 0.0971, "stdq": 0.054, "stdqueue_k": 0.0972, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 24.6094, "active_queue_size": 16384.0, "cl_loss": 9.3271, "doc_norm": 6.0636, "encoder_q-embeddings": 3865.1011, "encoder_q-layer.0": 3178.3203, "encoder_q-layer.1": 3763.8052, "encoder_q-layer.10": 2601.5454, "encoder_q-layer.11": 5321.499, "encoder_q-layer.2": 4228.8447, "encoder_q-layer.3": 4388.4492, "encoder_q-layer.4": 4666.5498, "encoder_q-layer.5": 4657.9932, "encoder_q-layer.6": 4362.1895, "encoder_q-layer.7": 3750.0007, "encoder_q-layer.8": 3050.3018, "encoder_q-layer.9": 1559.6521, "epoch": 0.02, "inbatch_neg_score": 1.4128, "inbatch_pos_score": 1.96, "learning_rate": 9.5e-06, "loss": 9.3271, "norm_diff": 4.0129, "norm_loss": 0.0, "num_token_doc": 66.5972, "num_token_overlap": 15.7977, "num_token_query": 42.3303, "num_token_union": 68.4585, "num_word_context": 202.3052, "num_word_doc": 49.6955, "num_word_query": 31.9828, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5710.3843, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.4102, "query_norm": 2.0507, "queue_k_norm": 6.0955, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3303, "sent_len_1": 66.5972, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.36, "stdk": 0.0926, "stdq": 0.0568, "stdqueue_k": 0.0935, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 24.5117, "active_queue_size": 16384.0, "cl_loss": 9.2308, "doc_norm": 5.956, "encoder_q-embeddings": 4174.3237, "encoder_q-layer.0": 3464.2119, "encoder_q-layer.1": 4103.0454, "encoder_q-layer.10": 5831.8672, "encoder_q-layer.11": 6724.7061, "encoder_q-layer.2": 4710.251, "encoder_q-layer.3": 4809.9072, "encoder_q-layer.4": 5081.2725, "encoder_q-layer.5": 5908.1367, "encoder_q-layer.6": 5694.3809, "encoder_q-layer.7": 5433.0464, "encoder_q-layer.8": 5840.6113, "encoder_q-layer.9": 3802.3901, "epoch": 0.02, "inbatch_neg_score": 1.2653, "inbatch_pos_score": 1.8389, "learning_rate": 1e-05, "loss": 9.2308, "norm_diff": 3.8993, "norm_loss": 0.0, "num_token_doc": 66.6548, "num_token_overlap": 15.8453, "num_token_query": 42.3372, "num_token_union": 68.3725, "num_word_context": 202.119, "num_word_doc": 49.7585, "num_word_query": 31.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7105.5048, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.251, "query_norm": 2.0567, "queue_k_norm": 5.9626, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3372, "sent_len_1": 66.6548, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.0375, "stdk": 0.0883, "stdq": 0.0568, "stdqueue_k": 0.09, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 21.6797, "active_queue_size": 16384.0, "cl_loss": 9.2038, "doc_norm": 5.8019, "encoder_q-embeddings": 18126.4336, "encoder_q-layer.0": 14686.8477, "encoder_q-layer.1": 14112.1641, "encoder_q-layer.10": 3876.457, "encoder_q-layer.11": 5103.8945, "encoder_q-layer.2": 14658.3076, "encoder_q-layer.3": 11929.0625, "encoder_q-layer.4": 12043.8682, "encoder_q-layer.5": 10346.5527, "encoder_q-layer.6": 7112.2183, "encoder_q-layer.7": 4990.1104, "encoder_q-layer.8": 4355.0488, "encoder_q-layer.9": 2591.0759, "epoch": 0.02, "inbatch_neg_score": 1.5612, "inbatch_pos_score": 2.0957, "learning_rate": 1.05e-05, "loss": 9.2038, "norm_diff": 3.585, "norm_loss": 0.0, "num_token_doc": 66.701, "num_token_overlap": 15.7157, "num_token_query": 42.0527, "num_token_union": 68.3626, "num_word_context": 202.367, "num_word_doc": 49.8295, "num_word_query": 31.7498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16652.5353, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.5527, "query_norm": 2.2169, "queue_k_norm": 5.8101, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.0527, "sent_len_1": 66.701, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9325, "stdk": 0.0868, "stdq": 0.0607, "stdqueue_k": 0.0867, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 25.3906, "active_queue_size": 16384.0, "cl_loss": 8.8044, "doc_norm": 5.6159, "encoder_q-embeddings": 2721.4265, "encoder_q-layer.0": 2234.532, "encoder_q-layer.1": 2465.4438, "encoder_q-layer.10": 1897.6084, "encoder_q-layer.11": 3731.593, "encoder_q-layer.2": 2828.9551, "encoder_q-layer.3": 3011.8521, "encoder_q-layer.4": 3409.3999, "encoder_q-layer.5": 4029.5547, "encoder_q-layer.6": 3800.5105, "encoder_q-layer.7": 3141.4976, "encoder_q-layer.8": 2543.8247, "encoder_q-layer.9": 1590.6375, "epoch": 0.02, "inbatch_neg_score": 1.7478, "inbatch_pos_score": 2.2871, "learning_rate": 1.1000000000000001e-05, "loss": 8.8044, "norm_diff": 3.3972, "norm_loss": 0.0, "num_token_doc": 66.5805, "num_token_overlap": 15.8793, "num_token_query": 42.3408, "num_token_union": 68.3309, "num_word_context": 201.8809, "num_word_doc": 49.675, "num_word_query": 31.9943, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4246.4297, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.7344, "query_norm": 2.2188, "queue_k_norm": 5.632, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3408, "sent_len_1": 66.5805, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.8587, "stdk": 0.0837, "stdq": 0.0619, "stdqueue_k": 0.0836, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 25.5859, "active_queue_size": 16384.0, "cl_loss": 8.671, "doc_norm": 5.407, "encoder_q-embeddings": 15580.3633, "encoder_q-layer.0": 14052.4014, "encoder_q-layer.1": 14718.1787, "encoder_q-layer.10": 29141.4746, "encoder_q-layer.11": 26256.2617, "encoder_q-layer.2": 14740.2734, "encoder_q-layer.3": 15069.7002, "encoder_q-layer.4": 14647.4043, "encoder_q-layer.5": 15463.5742, "encoder_q-layer.6": 15903.1572, "encoder_q-layer.7": 16292.9385, "encoder_q-layer.8": 18920.209, "encoder_q-layer.9": 19743.8223, "epoch": 0.02, "inbatch_neg_score": 1.4934, "inbatch_pos_score": 2.0312, "learning_rate": 1.1500000000000002e-05, "loss": 8.671, "norm_diff": 3.2343, "norm_loss": 0.0, "num_token_doc": 66.6916, "num_token_overlap": 15.7801, "num_token_query": 42.3644, "num_token_union": 68.5239, "num_word_context": 202.026, "num_word_doc": 49.7609, "num_word_query": 31.9902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24780.2075, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 1.4834, "query_norm": 2.1727, "queue_k_norm": 5.4275, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3644, "sent_len_1": 66.6916, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0488, "stdk": 0.0798, "stdq": 0.0618, "stdqueue_k": 0.0808, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 26.3672, "active_queue_size": 16384.0, "cl_loss": 8.2545, "doc_norm": 5.1973, "encoder_q-embeddings": 7614.0068, "encoder_q-layer.0": 6228.7412, "encoder_q-layer.1": 6485.9482, "encoder_q-layer.10": 19260.3184, "encoder_q-layer.11": 17607.3594, "encoder_q-layer.2": 5167.5688, "encoder_q-layer.3": 4866.8096, "encoder_q-layer.4": 5399.106, "encoder_q-layer.5": 6323.167, "encoder_q-layer.6": 7883.2729, "encoder_q-layer.7": 8706.0146, "encoder_q-layer.8": 10340.5605, "encoder_q-layer.9": 12604.6621, "epoch": 0.02, "inbatch_neg_score": 1.2903, "inbatch_pos_score": 1.8252, "learning_rate": 1.2e-05, "loss": 8.2545, "norm_diff": 3.0191, "norm_loss": 0.0, "num_token_doc": 66.8803, "num_token_overlap": 15.8336, "num_token_query": 42.3281, "num_token_union": 68.4939, "num_word_context": 202.3044, "num_word_doc": 49.8678, "num_word_query": 31.9818, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13370.5572, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2793, "query_norm": 2.1782, "queue_k_norm": 5.2184, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3281, "sent_len_1": 66.8803, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5563, "stdk": 0.0777, "stdq": 0.0628, "stdqueue_k": 0.0782, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 25.4883, "active_queue_size": 16384.0, "cl_loss": 7.9838, "doc_norm": 4.9758, "encoder_q-embeddings": 5139.0146, "encoder_q-layer.0": 4756.7949, "encoder_q-layer.1": 5148.9893, "encoder_q-layer.10": 22232.0234, "encoder_q-layer.11": 18242.0762, "encoder_q-layer.2": 5828.5361, "encoder_q-layer.3": 5764.0474, "encoder_q-layer.4": 6276.1431, "encoder_q-layer.5": 7089.1899, "encoder_q-layer.6": 9407.7871, "encoder_q-layer.7": 11464.3232, "encoder_q-layer.8": 13799.7959, "encoder_q-layer.9": 16687.082, "epoch": 0.02, "inbatch_neg_score": 1.2294, "inbatch_pos_score": 1.7285, "learning_rate": 1.25e-05, "loss": 7.9838, "norm_diff": 2.8228, "norm_loss": 0.0, "num_token_doc": 66.7698, "num_token_overlap": 15.8249, "num_token_query": 42.4886, "num_token_union": 68.5924, "num_word_context": 202.3481, "num_word_doc": 49.7896, "num_word_query": 32.0971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14909.8488, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 1.2197, "query_norm": 2.153, "queue_k_norm": 4.9887, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4886, "sent_len_1": 66.7698, "sent_len_max_0": 128.0, "sent_len_max_1": 190.375, "stdk": 0.0755, "stdq": 0.0628, "stdqueue_k": 0.0758, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 25.3906, "active_queue_size": 16384.0, "cl_loss": 7.7785, "doc_norm": 4.7366, "encoder_q-embeddings": 2872.9883, "encoder_q-layer.0": 2645.6201, "encoder_q-layer.1": 3026.5098, "encoder_q-layer.10": 21064.5078, "encoder_q-layer.11": 17834.8906, "encoder_q-layer.2": 3948.29, "encoder_q-layer.3": 4190.6265, "encoder_q-layer.4": 5382.8647, "encoder_q-layer.5": 7206.999, "encoder_q-layer.6": 9584.6055, "encoder_q-layer.7": 11061.2412, "encoder_q-layer.8": 11611.1875, "encoder_q-layer.9": 14045.8418, "epoch": 0.03, "inbatch_neg_score": 0.8995, "inbatch_pos_score": 1.4072, "learning_rate": 1.3000000000000001e-05, "loss": 7.7785, "norm_diff": 2.5853, "norm_loss": 0.0, "num_token_doc": 66.6715, "num_token_overlap": 15.8571, "num_token_query": 42.2736, "num_token_union": 68.3876, "num_word_context": 202.3524, "num_word_doc": 49.7764, "num_word_query": 31.9162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13509.597, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8965, "query_norm": 2.1513, "queue_k_norm": 4.7659, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2736, "sent_len_1": 66.6715, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.7738, "stdk": 0.0737, "stdq": 0.0617, "stdqueue_k": 0.0736, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 28.418, "active_queue_size": 16384.0, "cl_loss": 7.5714, "doc_norm": 4.5326, "encoder_q-embeddings": 3803.3103, "encoder_q-layer.0": 3067.7063, "encoder_q-layer.1": 4259.8096, "encoder_q-layer.10": 47322.6328, "encoder_q-layer.11": 35100.0859, "encoder_q-layer.2": 5713.7085, "encoder_q-layer.3": 7396.375, "encoder_q-layer.4": 10375.082, "encoder_q-layer.5": 14185.1807, "encoder_q-layer.6": 18957.1992, "encoder_q-layer.7": 23139.6895, "encoder_q-layer.8": 27079.6426, "encoder_q-layer.9": 34142.1055, "epoch": 0.03, "inbatch_neg_score": 1.2271, "inbatch_pos_score": 1.7773, "learning_rate": 1.3500000000000001e-05, "loss": 7.5714, "norm_diff": 2.3927, "norm_loss": 0.0, "num_token_doc": 66.7982, "num_token_overlap": 15.8761, "num_token_query": 42.3972, "num_token_union": 68.486, "num_word_context": 202.5186, "num_word_doc": 49.8307, "num_word_query": 32.0356, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28740.2477, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 1.2129, "query_norm": 2.1399, "queue_k_norm": 4.5405, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3972, "sent_len_1": 66.7982, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1587, "stdk": 0.0707, "stdq": 0.0622, "stdqueue_k": 0.0719, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 25.9766, "active_queue_size": 16384.0, "cl_loss": 7.4912, "doc_norm": 4.3041, "encoder_q-embeddings": 2900.7256, "encoder_q-layer.0": 2439.4177, "encoder_q-layer.1": 2566.386, "encoder_q-layer.10": 14719.2812, "encoder_q-layer.11": 11212.0996, "encoder_q-layer.2": 2949.3865, "encoder_q-layer.3": 3348.4749, "encoder_q-layer.4": 4473.979, "encoder_q-layer.5": 5735.4404, "encoder_q-layer.6": 7584.0176, "encoder_q-layer.7": 9097.8223, "encoder_q-layer.8": 10030.5527, "encoder_q-layer.9": 11532.6807, "epoch": 0.03, "inbatch_neg_score": 0.6098, "inbatch_pos_score": 1.1055, "learning_rate": 1.4000000000000001e-05, "loss": 7.4912, "norm_diff": 2.1932, "norm_loss": 0.0, "num_token_doc": 66.7958, "num_token_overlap": 15.8042, "num_token_query": 42.219, "num_token_union": 68.446, "num_word_context": 202.0651, "num_word_doc": 49.8596, "num_word_query": 31.8535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10207.8348, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6016, "query_norm": 2.111, "queue_k_norm": 4.3385, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.219, "sent_len_1": 66.7958, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3063, "stdk": 0.0702, "stdq": 0.0617, "stdqueue_k": 0.0702, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 28.125, "active_queue_size": 16384.0, "cl_loss": 7.2093, "doc_norm": 4.0938, "encoder_q-embeddings": 3509.1541, "encoder_q-layer.0": 3040.7415, "encoder_q-layer.1": 3366.6775, "encoder_q-layer.10": 5353.0576, "encoder_q-layer.11": 5574.564, "encoder_q-layer.2": 3845.5298, "encoder_q-layer.3": 3375.9038, "encoder_q-layer.4": 3261.3606, "encoder_q-layer.5": 3388.8079, "encoder_q-layer.6": 3472.2058, "encoder_q-layer.7": 3539.0029, "encoder_q-layer.8": 4002.3108, "encoder_q-layer.9": 4574.7915, "epoch": 0.03, "inbatch_neg_score": 0.5587, "inbatch_pos_score": 1.0723, "learning_rate": 1.45e-05, "loss": 7.2093, "norm_diff": 2.0143, "norm_loss": 0.0, "num_token_doc": 66.8577, "num_token_overlap": 15.821, "num_token_query": 42.3311, "num_token_union": 68.5001, "num_word_context": 201.9603, "num_word_doc": 49.8792, "num_word_query": 31.9868, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5455.2247, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5518, "query_norm": 2.0796, "queue_k_norm": 4.1264, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3311, "sent_len_1": 66.8577, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3288, "stdk": 0.0685, "stdq": 0.0616, "stdqueue_k": 0.0687, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 26.2695, "active_queue_size": 16384.0, "cl_loss": 7.0423, "doc_norm": 3.9455, "encoder_q-embeddings": 5681.9663, "encoder_q-layer.0": 5192.5049, "encoder_q-layer.1": 5676.1104, "encoder_q-layer.10": 19121.8574, "encoder_q-layer.11": 13235.3945, "encoder_q-layer.2": 6109.6528, "encoder_q-layer.3": 5521.6509, "encoder_q-layer.4": 5652.6221, "encoder_q-layer.5": 6549.1548, "encoder_q-layer.6": 7993.7144, "encoder_q-layer.7": 10161.8193, "encoder_q-layer.8": 12002.7314, "encoder_q-layer.9": 14373.3242, "epoch": 0.03, "inbatch_neg_score": 0.5411, "inbatch_pos_score": 1.041, "learning_rate": 1.5e-05, "loss": 7.0423, "norm_diff": 1.8894, "norm_loss": 0.0, "num_token_doc": 66.6056, "num_token_overlap": 15.8071, "num_token_query": 42.2916, "num_token_union": 68.408, "num_word_context": 202.4115, "num_word_doc": 49.7574, "num_word_query": 31.965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13105.8859, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5337, "query_norm": 2.0561, "queue_k_norm": 3.9489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2916, "sent_len_1": 66.6056, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.7363, "stdk": 0.067, "stdq": 0.0606, "stdqueue_k": 0.0675, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 29.2969, "active_queue_size": 16384.0, "cl_loss": 6.8306, "doc_norm": 3.7766, "encoder_q-embeddings": 1498.2433, "encoder_q-layer.0": 1288.5736, "encoder_q-layer.1": 1510.6174, "encoder_q-layer.10": 16780.0371, "encoder_q-layer.11": 12071.4707, "encoder_q-layer.2": 1960.2645, "encoder_q-layer.3": 2258.9663, "encoder_q-layer.4": 2940.4341, "encoder_q-layer.5": 4026.6167, "encoder_q-layer.6": 5330.6655, "encoder_q-layer.7": 6867.9121, "encoder_q-layer.8": 8853.459, "encoder_q-layer.9": 11557.3086, "epoch": 0.03, "inbatch_neg_score": 0.5532, "inbatch_pos_score": 1.0596, "learning_rate": 1.55e-05, "loss": 6.8306, "norm_diff": 1.7205, "norm_loss": 0.0, "num_token_doc": 66.6193, "num_token_overlap": 15.7434, "num_token_query": 42.1812, "num_token_union": 68.3983, "num_word_context": 202.4798, "num_word_doc": 49.7435, "num_word_query": 31.86, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9634.539, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5459, "query_norm": 2.0561, "queue_k_norm": 3.7936, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1812, "sent_len_1": 66.6193, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0625, "stdk": 0.066, "stdq": 0.0595, "stdqueue_k": 0.0662, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 32.6172, "active_queue_size": 16384.0, "cl_loss": 6.6644, "doc_norm": 3.6439, "encoder_q-embeddings": 1148.0004, "encoder_q-layer.0": 967.4291, "encoder_q-layer.1": 1059.4231, "encoder_q-layer.10": 5840.3501, "encoder_q-layer.11": 5816.3481, "encoder_q-layer.2": 1134.5563, "encoder_q-layer.3": 1174.9821, "encoder_q-layer.4": 1481.1414, "encoder_q-layer.5": 1847.0687, "encoder_q-layer.6": 2302.1963, "encoder_q-layer.7": 2597.5688, "encoder_q-layer.8": 3339.0068, "encoder_q-layer.9": 3918.6741, "epoch": 0.03, "inbatch_neg_score": 0.6011, "inbatch_pos_score": 1.1299, "learning_rate": 1.6000000000000003e-05, "loss": 6.6644, "norm_diff": 1.6168, "norm_loss": 0.0, "num_token_doc": 66.6687, "num_token_overlap": 15.7825, "num_token_query": 42.2661, "num_token_union": 68.4377, "num_word_context": 202.2039, "num_word_doc": 49.7638, "num_word_query": 31.9425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3916.0064, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5933, "query_norm": 2.0272, "queue_k_norm": 3.6414, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2661, "sent_len_1": 66.6687, "sent_len_max_0": 127.9862, "sent_len_max_1": 187.19, "stdk": 0.0648, "stdq": 0.0584, "stdqueue_k": 0.0655, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 30.6641, "active_queue_size": 16384.0, "cl_loss": 6.3966, "doc_norm": 3.5161, "encoder_q-embeddings": 1516.4938, "encoder_q-layer.0": 1361.8308, "encoder_q-layer.1": 1654.6459, "encoder_q-layer.10": 26029.2617, "encoder_q-layer.11": 18410.791, "encoder_q-layer.2": 2194.2249, "encoder_q-layer.3": 2728.9495, "encoder_q-layer.4": 3747.071, "encoder_q-layer.5": 5266.2559, "encoder_q-layer.6": 7054.7231, "encoder_q-layer.7": 9560.1396, "encoder_q-layer.8": 11914.9092, "encoder_q-layer.9": 17025.9023, "epoch": 0.03, "inbatch_neg_score": 0.7677, "inbatch_pos_score": 1.3018, "learning_rate": 1.65e-05, "loss": 6.3966, "norm_diff": 1.5372, "norm_loss": 0.0, "num_token_doc": 66.9942, "num_token_overlap": 15.8158, "num_token_query": 42.2986, "num_token_union": 68.5721, "num_word_context": 202.6073, "num_word_doc": 49.9887, "num_word_query": 31.9569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14164.0804, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7627, "query_norm": 1.979, "queue_k_norm": 3.5138, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2986, "sent_len_1": 66.9942, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1475, "stdk": 0.0642, "stdq": 0.0584, "stdqueue_k": 0.0644, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 31.8359, "active_queue_size": 16384.0, "cl_loss": 6.4446, "doc_norm": 3.3905, "encoder_q-embeddings": 2133.9246, "encoder_q-layer.0": 1976.0402, "encoder_q-layer.1": 2029.2427, "encoder_q-layer.10": 17531.207, "encoder_q-layer.11": 12718.4717, "encoder_q-layer.2": 2276.9863, "encoder_q-layer.3": 2487.7209, "encoder_q-layer.4": 2975.9006, "encoder_q-layer.5": 3947.2458, "encoder_q-layer.6": 5279.2241, "encoder_q-layer.7": 7076.5205, "encoder_q-layer.8": 8787.0039, "encoder_q-layer.9": 12217.2715, "epoch": 0.03, "inbatch_neg_score": 0.7368, "inbatch_pos_score": 1.2656, "learning_rate": 1.7000000000000003e-05, "loss": 6.4446, "norm_diff": 1.4092, "norm_loss": 0.0, "num_token_doc": 66.8782, "num_token_overlap": 15.7976, "num_token_query": 42.4492, "num_token_union": 68.6331, "num_word_context": 202.6341, "num_word_doc": 49.8748, "num_word_query": 32.0926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10092.5092, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7285, "query_norm": 1.9813, "queue_k_norm": 3.3949, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4492, "sent_len_1": 66.8782, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.3175, "stdk": 0.0635, "stdq": 0.0568, "stdqueue_k": 0.0637, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 31.1523, "active_queue_size": 16384.0, "cl_loss": 6.3329, "doc_norm": 3.279, "encoder_q-embeddings": 2760.6138, "encoder_q-layer.0": 2363.959, "encoder_q-layer.1": 2528.0657, "encoder_q-layer.10": 18241.2793, "encoder_q-layer.11": 12698.6885, "encoder_q-layer.2": 2972.8037, "encoder_q-layer.3": 3178.2798, "encoder_q-layer.4": 3663.7156, "encoder_q-layer.5": 4694.9966, "encoder_q-layer.6": 5835.6128, "encoder_q-layer.7": 7056.5869, "encoder_q-layer.8": 8442.9404, "encoder_q-layer.9": 11898.7061, "epoch": 0.03, "inbatch_neg_score": 0.4797, "inbatch_pos_score": 0.9995, "learning_rate": 1.75e-05, "loss": 6.3329, "norm_diff": 1.2942, "norm_loss": 0.0, "num_token_doc": 66.8832, "num_token_overlap": 15.8004, "num_token_query": 42.2716, "num_token_union": 68.5104, "num_word_context": 202.1206, "num_word_doc": 49.8892, "num_word_query": 31.9161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10437.4475, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4751, "query_norm": 1.9848, "queue_k_norm": 3.295, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2716, "sent_len_1": 66.8832, "sent_len_max_0": 127.99, "sent_len_max_1": 190.85, "stdk": 0.063, "stdq": 0.0565, "stdqueue_k": 0.0631, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 30.3711, "active_queue_size": 16384.0, "cl_loss": 6.0974, "doc_norm": 3.1905, "encoder_q-embeddings": 2207.8726, "encoder_q-layer.0": 1859.6976, "encoder_q-layer.1": 2046.5437, "encoder_q-layer.10": 20157.8906, "encoder_q-layer.11": 15728.8467, "encoder_q-layer.2": 2370.3096, "encoder_q-layer.3": 2591.5957, "encoder_q-layer.4": 3174.6306, "encoder_q-layer.5": 3630.1101, "encoder_q-layer.6": 5068.375, "encoder_q-layer.7": 7156.6567, "encoder_q-layer.8": 8937.4092, "encoder_q-layer.9": 13204.7617, "epoch": 0.04, "inbatch_neg_score": 0.5391, "inbatch_pos_score": 1.0488, "learning_rate": 1.8e-05, "loss": 6.0974, "norm_diff": 1.2349, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 15.8339, "num_token_query": 42.5082, "num_token_union": 68.5774, "num_word_context": 202.5946, "num_word_doc": 49.8153, "num_word_query": 32.1215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11394.5008, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5327, "query_norm": 1.9556, "queue_k_norm": 3.1924, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5082, "sent_len_1": 66.7393, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.8125, "stdk": 0.0621, "stdq": 0.0558, "stdqueue_k": 0.0624, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 32.6172, "active_queue_size": 16384.0, "cl_loss": 6.3787, "doc_norm": 3.1079, "encoder_q-embeddings": 1886.6761, "encoder_q-layer.0": 1535.5194, "encoder_q-layer.1": 1870.1267, "encoder_q-layer.10": 35957.5508, "encoder_q-layer.11": 26655.9219, "encoder_q-layer.2": 2337.8281, "encoder_q-layer.3": 2905.5549, "encoder_q-layer.4": 4222.1475, "encoder_q-layer.5": 6105.2515, "encoder_q-layer.6": 8682.415, "encoder_q-layer.7": 11685.5664, "encoder_q-layer.8": 15587.0371, "encoder_q-layer.9": 22149.168, "epoch": 0.04, "inbatch_neg_score": 0.3993, "inbatch_pos_score": 0.9224, "learning_rate": 1.85e-05, "loss": 6.3787, "norm_diff": 1.1461, "norm_loss": 0.0, "num_token_doc": 66.8237, "num_token_overlap": 15.9124, "num_token_query": 42.5215, "num_token_union": 68.5373, "num_word_context": 202.33, "num_word_doc": 49.8443, "num_word_query": 32.122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19319.1359, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3926, "query_norm": 1.9618, "queue_k_norm": 3.1082, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5215, "sent_len_1": 66.8237, "sent_len_max_0": 128.0, "sent_len_max_1": 188.945, "stdk": 0.0615, "stdq": 0.0541, "stdqueue_k": 0.0621, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 35.0586, "active_queue_size": 16384.0, "cl_loss": 5.9223, "doc_norm": 3.0269, "encoder_q-embeddings": 1504.6759, "encoder_q-layer.0": 1252.1221, "encoder_q-layer.1": 1492.3783, "encoder_q-layer.10": 18006.2188, "encoder_q-layer.11": 14320.5381, "encoder_q-layer.2": 1873.8372, "encoder_q-layer.3": 2199.2466, "encoder_q-layer.4": 3013.8313, "encoder_q-layer.5": 4359.6211, "encoder_q-layer.6": 6411.3584, "encoder_q-layer.7": 8369.9316, "encoder_q-layer.8": 9807.2783, "encoder_q-layer.9": 11979.5664, "epoch": 0.04, "inbatch_neg_score": 0.7137, "inbatch_pos_score": 1.2393, "learning_rate": 1.9e-05, "loss": 5.9223, "norm_diff": 1.1198, "norm_loss": 0.0, "num_token_doc": 66.888, "num_token_overlap": 15.774, "num_token_query": 42.3403, "num_token_union": 68.5927, "num_word_context": 202.181, "num_word_doc": 49.8914, "num_word_query": 31.9987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10864.2847, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7046, "query_norm": 1.9071, "queue_k_norm": 3.0269, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3403, "sent_len_1": 66.888, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3887, "stdk": 0.0608, "stdq": 0.0537, "stdqueue_k": 0.0617, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 28.8086, "active_queue_size": 16384.0, "cl_loss": 5.7707, "doc_norm": 2.9465, "encoder_q-embeddings": 5620.731, "encoder_q-layer.0": 4954.9321, "encoder_q-layer.1": 4899.4536, "encoder_q-layer.10": 36704.8125, "encoder_q-layer.11": 32727.6738, "encoder_q-layer.2": 5597.5396, "encoder_q-layer.3": 4897.5845, "encoder_q-layer.4": 5009.7026, "encoder_q-layer.5": 6130.6958, "encoder_q-layer.6": 8853.9395, "encoder_q-layer.7": 13338.8691, "encoder_q-layer.8": 18062.4023, "encoder_q-layer.9": 25838.5, "epoch": 0.04, "inbatch_neg_score": 0.8168, "inbatch_pos_score": 1.333, "learning_rate": 1.9500000000000003e-05, "loss": 5.7707, "norm_diff": 1.0319, "norm_loss": 0.0, "num_token_doc": 66.7612, "num_token_overlap": 15.8561, "num_token_query": 42.3741, "num_token_union": 68.535, "num_word_context": 202.4845, "num_word_doc": 49.814, "num_word_query": 32.0123, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22237.9988, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.8105, "query_norm": 1.9146, "queue_k_norm": 2.946, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3741, "sent_len_1": 66.7612, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8812, "stdk": 0.0615, "stdq": 0.0531, "stdqueue_k": 0.0612, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 36.5234, "active_queue_size": 16384.0, "cl_loss": 5.5599, "doc_norm": 2.8587, "encoder_q-embeddings": 1071.1394, "encoder_q-layer.0": 893.6576, "encoder_q-layer.1": 934.4914, "encoder_q-layer.10": 2434.6128, "encoder_q-layer.11": 3727.9133, "encoder_q-layer.2": 1059.3168, "encoder_q-layer.3": 1094.0322, "encoder_q-layer.4": 1118.5338, "encoder_q-layer.5": 1198.373, "encoder_q-layer.6": 1241.807, "encoder_q-layer.7": 1305.2252, "encoder_q-layer.8": 1583.0417, "encoder_q-layer.9": 1747.1257, "epoch": 0.04, "inbatch_neg_score": 0.5825, "inbatch_pos_score": 1.123, "learning_rate": 2e-05, "loss": 5.5599, "norm_diff": 1.0304, "norm_loss": 0.0, "num_token_doc": 66.9285, "num_token_overlap": 15.7696, "num_token_query": 42.3541, "num_token_union": 68.6734, "num_word_context": 202.8391, "num_word_doc": 49.9552, "num_word_query": 31.9946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2324.1638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5811, "query_norm": 1.8284, "queue_k_norm": 2.8631, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3541, "sent_len_1": 66.9285, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1213, "stdk": 0.0605, "stdq": 0.0512, "stdqueue_k": 0.0605, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 35.4492, "active_queue_size": 16384.0, "cl_loss": 5.3801, "doc_norm": 2.7651, "encoder_q-embeddings": 1316.5642, "encoder_q-layer.0": 1129.1804, "encoder_q-layer.1": 1179.7969, "encoder_q-layer.10": 8123.4937, "encoder_q-layer.11": 7575.002, "encoder_q-layer.2": 1307.4958, "encoder_q-layer.3": 1281.9384, "encoder_q-layer.4": 1475.8672, "encoder_q-layer.5": 1867.0214, "encoder_q-layer.6": 2325.8455, "encoder_q-layer.7": 3268.8357, "encoder_q-layer.8": 3981.7883, "encoder_q-layer.9": 5138.186, "epoch": 0.04, "inbatch_neg_score": 0.5806, "inbatch_pos_score": 1.0996, "learning_rate": 2.05e-05, "loss": 5.3801, "norm_diff": 0.9818, "norm_loss": 0.0, "num_token_doc": 66.7671, "num_token_overlap": 15.8148, "num_token_query": 42.3362, "num_token_union": 68.4932, "num_word_context": 202.1415, "num_word_doc": 49.8198, "num_word_query": 31.961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5045.8375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5752, "query_norm": 1.7833, "queue_k_norm": 2.7769, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3362, "sent_len_1": 66.7671, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.8837, "stdk": 0.06, "stdq": 0.0502, "stdqueue_k": 0.06, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 5.2733, "doc_norm": 2.6745, "encoder_q-embeddings": 886.6722, "encoder_q-layer.0": 695.8379, "encoder_q-layer.1": 743.4449, "encoder_q-layer.10": 3627.9226, "encoder_q-layer.11": 4591.5938, "encoder_q-layer.2": 811.645, "encoder_q-layer.3": 828.3389, "encoder_q-layer.4": 822.6309, "encoder_q-layer.5": 918.1459, "encoder_q-layer.6": 988.9404, "encoder_q-layer.7": 1120.264, "encoder_q-layer.8": 1486.3032, "encoder_q-layer.9": 1944.4442, "epoch": 0.04, "inbatch_neg_score": 0.4515, "inbatch_pos_score": 0.9819, "learning_rate": 2.1e-05, "loss": 5.2733, "norm_diff": 0.9508, "norm_loss": 0.0, "num_token_doc": 66.7395, "num_token_overlap": 15.8053, "num_token_query": 42.2479, "num_token_union": 68.3893, "num_word_context": 202.1415, "num_word_doc": 49.7674, "num_word_query": 31.9394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2569.9138, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.447, "query_norm": 1.7237, "queue_k_norm": 2.6847, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2479, "sent_len_1": 66.7395, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.5312, "stdk": 0.059, "stdq": 0.0483, "stdqueue_k": 0.0594, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 5.1096, "doc_norm": 2.5887, "encoder_q-embeddings": 6726.3188, "encoder_q-layer.0": 6684.0469, "encoder_q-layer.1": 7365.5698, "encoder_q-layer.10": 6915.918, "encoder_q-layer.11": 7805.6978, "encoder_q-layer.2": 6836.645, "encoder_q-layer.3": 6359.3149, "encoder_q-layer.4": 5035.0581, "encoder_q-layer.5": 4849.4004, "encoder_q-layer.6": 5287.8994, "encoder_q-layer.7": 6017.7852, "encoder_q-layer.8": 6176.9399, "encoder_q-layer.9": 5567.4941, "epoch": 0.04, "inbatch_neg_score": 0.4435, "inbatch_pos_score": 0.9795, "learning_rate": 2.15e-05, "loss": 5.1096, "norm_diff": 0.8669, "norm_loss": 0.0, "num_token_doc": 66.6239, "num_token_overlap": 15.788, "num_token_query": 42.2757, "num_token_union": 68.4071, "num_word_context": 202.1841, "num_word_doc": 49.7285, "num_word_query": 31.9165, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9179.2196, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4387, "query_norm": 1.7218, "queue_k_norm": 2.5921, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2757, "sent_len_1": 66.6239, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6075, "stdk": 0.0586, "stdq": 0.0477, "stdqueue_k": 0.0588, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 36.4258, "active_queue_size": 16384.0, "cl_loss": 4.9951, "doc_norm": 2.4923, "encoder_q-embeddings": 2561.8628, "encoder_q-layer.0": 2054.3999, "encoder_q-layer.1": 2290.3894, "encoder_q-layer.10": 12964.7529, "encoder_q-layer.11": 11523.9053, "encoder_q-layer.2": 2584.8152, "encoder_q-layer.3": 2654.0793, "encoder_q-layer.4": 2904.5471, "encoder_q-layer.5": 3811.1025, "encoder_q-layer.6": 4581.189, "encoder_q-layer.7": 5849.084, "encoder_q-layer.8": 6994.6968, "encoder_q-layer.9": 8631.167, "epoch": 0.04, "inbatch_neg_score": 0.3978, "inbatch_pos_score": 0.9297, "learning_rate": 2.2000000000000003e-05, "loss": 4.9951, "norm_diff": 0.8027, "norm_loss": 0.0, "num_token_doc": 66.6445, "num_token_overlap": 15.8652, "num_token_query": 42.5354, "num_token_union": 68.5212, "num_word_context": 202.1348, "num_word_doc": 49.7688, "num_word_query": 32.1402, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8547.677, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.394, "query_norm": 1.6896, "queue_k_norm": 2.5006, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5354, "sent_len_1": 66.6445, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5062, "stdk": 0.0576, "stdq": 0.0464, "stdqueue_k": 0.0581, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.9039, "doc_norm": 2.4141, "encoder_q-embeddings": 3317.4097, "encoder_q-layer.0": 2779.2097, "encoder_q-layer.1": 2936.9773, "encoder_q-layer.10": 5308.1226, "encoder_q-layer.11": 6630.6694, "encoder_q-layer.2": 2977.197, "encoder_q-layer.3": 2982.5537, "encoder_q-layer.4": 2847.7764, "encoder_q-layer.5": 2891.3848, "encoder_q-layer.6": 3491.3418, "encoder_q-layer.7": 4142.3857, "encoder_q-layer.8": 4001.1272, "encoder_q-layer.9": 4072.8052, "epoch": 0.04, "inbatch_neg_score": 0.4952, "inbatch_pos_score": 1.0586, "learning_rate": 2.25e-05, "loss": 4.9039, "norm_diff": 0.7042, "norm_loss": 0.0, "num_token_doc": 66.8951, "num_token_overlap": 15.7906, "num_token_query": 42.1917, "num_token_union": 68.4824, "num_word_context": 202.4507, "num_word_doc": 49.8723, "num_word_query": 31.8453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5520.4823, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4958, "query_norm": 1.7099, "queue_k_norm": 2.4102, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1917, "sent_len_1": 66.8951, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.8963, "stdk": 0.0573, "stdq": 0.0475, "stdqueue_k": 0.0573, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 37.5977, "active_queue_size": 16384.0, "cl_loss": 4.8207, "doc_norm": 2.3267, "encoder_q-embeddings": 2407.3411, "encoder_q-layer.0": 2013.9594, "encoder_q-layer.1": 2098.7959, "encoder_q-layer.10": 4792.2603, "encoder_q-layer.11": 6444.5562, "encoder_q-layer.2": 2336.6694, "encoder_q-layer.3": 2325.2822, "encoder_q-layer.4": 2194.416, "encoder_q-layer.5": 2198.3342, "encoder_q-layer.6": 2732.4812, "encoder_q-layer.7": 3400.7908, "encoder_q-layer.8": 4219.6938, "encoder_q-layer.9": 3142.0776, "epoch": 0.04, "inbatch_neg_score": 0.5186, "inbatch_pos_score": 1.0469, "learning_rate": 2.3000000000000003e-05, "loss": 4.8207, "norm_diff": 0.617, "norm_loss": 0.0, "num_token_doc": 66.9074, "num_token_overlap": 15.9066, "num_token_query": 42.4509, "num_token_union": 68.5519, "num_word_context": 202.367, "num_word_doc": 49.8638, "num_word_query": 32.0638, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4681.7541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5146, "query_norm": 1.7097, "queue_k_norm": 2.3279, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4509, "sent_len_1": 66.9074, "sent_len_max_0": 127.995, "sent_len_max_1": 191.28, "stdk": 0.0569, "stdq": 0.0476, "stdqueue_k": 0.0568, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 39.7461, "active_queue_size": 16384.0, "cl_loss": 4.7477, "doc_norm": 2.2485, "encoder_q-embeddings": 2840.9336, "encoder_q-layer.0": 2334.6052, "encoder_q-layer.1": 2328.3955, "encoder_q-layer.10": 2522.7319, "encoder_q-layer.11": 4908.7104, "encoder_q-layer.2": 2514.5649, "encoder_q-layer.3": 2521.1367, "encoder_q-layer.4": 2395.584, "encoder_q-layer.5": 2333.5879, "encoder_q-layer.6": 1968.8038, "encoder_q-layer.7": 1824.4563, "encoder_q-layer.8": 1969.9868, "encoder_q-layer.9": 1753.5587, "epoch": 0.05, "inbatch_neg_score": 0.5076, "inbatch_pos_score": 1.04, "learning_rate": 2.35e-05, "loss": 4.7477, "norm_diff": 0.565, "norm_loss": 0.0, "num_token_doc": 66.7472, "num_token_overlap": 15.8064, "num_token_query": 42.3702, "num_token_union": 68.5454, "num_word_context": 202.2811, "num_word_doc": 49.8499, "num_word_query": 31.9946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3810.7115, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5, "query_norm": 1.6835, "queue_k_norm": 2.2535, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3702, "sent_len_1": 66.7472, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.435, "stdk": 0.0559, "stdq": 0.0469, "stdqueue_k": 0.0562, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.7099, "doc_norm": 2.1675, "encoder_q-embeddings": 1975.848, "encoder_q-layer.0": 1660.9788, "encoder_q-layer.1": 1663.4358, "encoder_q-layer.10": 5849.0361, "encoder_q-layer.11": 7496.8945, "encoder_q-layer.2": 1867.1919, "encoder_q-layer.3": 1945.3909, "encoder_q-layer.4": 1979.1223, "encoder_q-layer.5": 2297.1323, "encoder_q-layer.6": 2855.8315, "encoder_q-layer.7": 3690.4653, "encoder_q-layer.8": 4333.0479, "encoder_q-layer.9": 5027.6133, "epoch": 0.05, "inbatch_neg_score": 0.5391, "inbatch_pos_score": 1.0771, "learning_rate": 2.4e-05, "loss": 4.7099, "norm_diff": 0.4832, "norm_loss": 0.0, "num_token_doc": 66.7315, "num_token_overlap": 15.8098, "num_token_query": 42.3451, "num_token_union": 68.4631, "num_word_context": 202.0932, "num_word_doc": 49.7629, "num_word_query": 31.9875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5135.0901, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5322, "query_norm": 1.6843, "queue_k_norm": 2.1772, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3451, "sent_len_1": 66.7315, "sent_len_max_0": 128.0, "sent_len_max_1": 188.985, "stdk": 0.055, "stdq": 0.0461, "stdqueue_k": 0.0553, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.6531, "doc_norm": 2.1064, "encoder_q-embeddings": 2174.4895, "encoder_q-layer.0": 1853.6797, "encoder_q-layer.1": 1762.4976, "encoder_q-layer.10": 3302.1953, "encoder_q-layer.11": 5357.915, "encoder_q-layer.2": 2022.0867, "encoder_q-layer.3": 2045.3215, "encoder_q-layer.4": 1969.1378, "encoder_q-layer.5": 1750.3938, "encoder_q-layer.6": 1938.5804, "encoder_q-layer.7": 2488.4727, "encoder_q-layer.8": 2921.4441, "encoder_q-layer.9": 2606.5762, "epoch": 0.05, "inbatch_neg_score": 0.5923, "inbatch_pos_score": 1.1113, "learning_rate": 2.45e-05, "loss": 4.6531, "norm_diff": 0.4152, "norm_loss": 0.0, "num_token_doc": 66.6837, "num_token_overlap": 15.761, "num_token_query": 42.1691, "num_token_union": 68.366, "num_word_context": 202.4099, "num_word_doc": 49.7512, "num_word_query": 31.8553, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3779.8842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5845, "query_norm": 1.6912, "queue_k_norm": 2.1178, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1691, "sent_len_1": 66.6837, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.8113, "stdk": 0.0546, "stdq": 0.0462, "stdqueue_k": 0.0549, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.5613, "doc_norm": 2.0555, "encoder_q-embeddings": 1307.8779, "encoder_q-layer.0": 1015.5703, "encoder_q-layer.1": 1141.0109, "encoder_q-layer.10": 5677.0703, "encoder_q-layer.11": 6839.0669, "encoder_q-layer.2": 1316.1377, "encoder_q-layer.3": 1333.999, "encoder_q-layer.4": 1447.0997, "encoder_q-layer.5": 1628.7611, "encoder_q-layer.6": 2205.095, "encoder_q-layer.7": 3423.7942, "encoder_q-layer.8": 4610.9141, "encoder_q-layer.9": 5230.0225, "epoch": 0.05, "inbatch_neg_score": 0.6008, "inbatch_pos_score": 1.1572, "learning_rate": 2.5e-05, "loss": 4.5613, "norm_diff": 0.3352, "norm_loss": 0.0, "num_token_doc": 66.8347, "num_token_overlap": 15.8794, "num_token_query": 42.3291, "num_token_union": 68.4848, "num_word_context": 202.4573, "num_word_doc": 49.8734, "num_word_query": 31.9822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4672.0945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 1.7203, "queue_k_norm": 2.0611, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3291, "sent_len_1": 66.8347, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.2812, "stdk": 0.0539, "stdq": 0.0475, "stdqueue_k": 0.0543, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.5331, "doc_norm": 2.0078, "encoder_q-embeddings": 1584.6381, "encoder_q-layer.0": 1204.4464, "encoder_q-layer.1": 1317.2885, "encoder_q-layer.10": 5264.1128, "encoder_q-layer.11": 6145.625, "encoder_q-layer.2": 1477.8715, "encoder_q-layer.3": 1494.2168, "encoder_q-layer.4": 1793.4347, "encoder_q-layer.5": 2254.9868, "encoder_q-layer.6": 3156.8213, "encoder_q-layer.7": 4532.3052, "encoder_q-layer.8": 5273.4897, "encoder_q-layer.9": 4915.9707, "epoch": 0.05, "inbatch_neg_score": 0.6152, "inbatch_pos_score": 1.1914, "learning_rate": 2.5500000000000003e-05, "loss": 4.5331, "norm_diff": 0.2814, "norm_loss": 0.0, "num_token_doc": 66.6325, "num_token_overlap": 15.8659, "num_token_query": 42.5176, "num_token_union": 68.5244, "num_word_context": 202.0513, "num_word_doc": 49.7103, "num_word_query": 32.0939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4883.6155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6147, "query_norm": 1.7265, "queue_k_norm": 2.0143, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5176, "sent_len_1": 66.6325, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0762, "stdk": 0.0536, "stdq": 0.0469, "stdqueue_k": 0.0538, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.5067, "doc_norm": 1.9711, "encoder_q-embeddings": 1917.1038, "encoder_q-layer.0": 1652.3655, "encoder_q-layer.1": 1799.6532, "encoder_q-layer.10": 3972.8394, "encoder_q-layer.11": 4692.4673, "encoder_q-layer.2": 1903.7583, "encoder_q-layer.3": 1717.1372, "encoder_q-layer.4": 1564.1278, "encoder_q-layer.5": 1807.8804, "encoder_q-layer.6": 2452.8093, "encoder_q-layer.7": 3185.8289, "encoder_q-layer.8": 3558.6196, "encoder_q-layer.9": 3336.7456, "epoch": 0.05, "inbatch_neg_score": 0.6516, "inbatch_pos_score": 1.2314, "learning_rate": 2.6000000000000002e-05, "loss": 4.5067, "norm_diff": 0.2655, "norm_loss": 0.0, "num_token_doc": 67.1015, "num_token_overlap": 15.8451, "num_token_query": 42.4745, "num_token_union": 68.7388, "num_word_context": 202.6512, "num_word_doc": 50.0804, "num_word_query": 32.0864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3876.5225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6504, "query_norm": 1.7056, "queue_k_norm": 1.9712, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4745, "sent_len_1": 67.1015, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1362, "stdk": 0.0532, "stdq": 0.0462, "stdqueue_k": 0.0532, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.4884, "doc_norm": 1.9281, "encoder_q-embeddings": 1648.6538, "encoder_q-layer.0": 1315.7472, "encoder_q-layer.1": 1447.7875, "encoder_q-layer.10": 2348.4717, "encoder_q-layer.11": 3918.095, "encoder_q-layer.2": 1539.7635, "encoder_q-layer.3": 1619.2063, "encoder_q-layer.4": 1488.2131, "encoder_q-layer.5": 1442.8054, "encoder_q-layer.6": 1433.3456, "encoder_q-layer.7": 1442.0779, "encoder_q-layer.8": 1721.0687, "encoder_q-layer.9": 1722.5681, "epoch": 0.05, "inbatch_neg_score": 0.6539, "inbatch_pos_score": 1.2109, "learning_rate": 2.6500000000000004e-05, "loss": 4.4884, "norm_diff": 0.2201, "norm_loss": 0.0, "num_token_doc": 66.5383, "num_token_overlap": 15.8546, "num_token_query": 42.2744, "num_token_union": 68.2736, "num_word_context": 202.1871, "num_word_doc": 49.6742, "num_word_query": 31.9325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2734.3415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6484, "query_norm": 1.708, "queue_k_norm": 1.9411, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2744, "sent_len_1": 66.5383, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2512, "stdk": 0.0523, "stdq": 0.0461, "stdqueue_k": 0.0529, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.4273, "doc_norm": 1.9174, "encoder_q-embeddings": 2380.5332, "encoder_q-layer.0": 1950.5867, "encoder_q-layer.1": 2090.7798, "encoder_q-layer.10": 2089.5078, "encoder_q-layer.11": 3872.9165, "encoder_q-layer.2": 1934.6486, "encoder_q-layer.3": 1791.1506, "encoder_q-layer.4": 1690.0264, "encoder_q-layer.5": 1569.1976, "encoder_q-layer.6": 1566.2568, "encoder_q-layer.7": 1517.7833, "encoder_q-layer.8": 1590.491, "encoder_q-layer.9": 1471.6191, "epoch": 0.05, "inbatch_neg_score": 0.6695, "inbatch_pos_score": 1.2266, "learning_rate": 2.7000000000000002e-05, "loss": 4.4273, "norm_diff": 0.1997, "norm_loss": 0.0, "num_token_doc": 66.5977, "num_token_overlap": 15.7819, "num_token_query": 42.2535, "num_token_union": 68.3752, "num_word_context": 202.0667, "num_word_doc": 49.66, "num_word_query": 31.9257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3057.2807, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6675, "query_norm": 1.7177, "queue_k_norm": 1.9127, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2535, "sent_len_1": 66.5977, "sent_len_max_0": 128.0, "sent_len_max_1": 190.815, "stdk": 0.0526, "stdq": 0.0464, "stdqueue_k": 0.0525, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.4158, "doc_norm": 1.8855, "encoder_q-embeddings": 3254.552, "encoder_q-layer.0": 2736.4895, "encoder_q-layer.1": 2784.6152, "encoder_q-layer.10": 4877.064, "encoder_q-layer.11": 5104.5347, "encoder_q-layer.2": 3176.582, "encoder_q-layer.3": 3070.0791, "encoder_q-layer.4": 3131.4927, "encoder_q-layer.5": 2952.168, "encoder_q-layer.6": 2852.4136, "encoder_q-layer.7": 2540.4548, "encoder_q-layer.8": 3474.1958, "encoder_q-layer.9": 4437.4067, "epoch": 0.05, "inbatch_neg_score": 0.722, "inbatch_pos_score": 1.2793, "learning_rate": 2.7500000000000004e-05, "loss": 4.4158, "norm_diff": 0.1371, "norm_loss": 0.0, "num_token_doc": 66.6115, "num_token_overlap": 15.7025, "num_token_query": 42.094, "num_token_union": 68.3539, "num_word_context": 201.989, "num_word_doc": 49.6707, "num_word_query": 31.7815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4951.7087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7173, "query_norm": 1.7484, "queue_k_norm": 1.8881, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.094, "sent_len_1": 66.6115, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3338, "stdk": 0.0519, "stdq": 0.0471, "stdqueue_k": 0.052, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.3698, "doc_norm": 1.8601, "encoder_q-embeddings": 5377.9883, "encoder_q-layer.0": 5362.2808, "encoder_q-layer.1": 6007.1646, "encoder_q-layer.10": 2563.8831, "encoder_q-layer.11": 4151.9004, "encoder_q-layer.2": 6298.7427, "encoder_q-layer.3": 6446.0938, "encoder_q-layer.4": 6365.6479, "encoder_q-layer.5": 6230.7236, "encoder_q-layer.6": 5766.3057, "encoder_q-layer.7": 3202.1279, "encoder_q-layer.8": 2293.3696, "encoder_q-layer.9": 1755.8053, "epoch": 0.05, "inbatch_neg_score": 0.7248, "inbatch_pos_score": 1.3037, "learning_rate": 2.8000000000000003e-05, "loss": 4.3698, "norm_diff": 0.0958, "norm_loss": 0.0, "num_token_doc": 66.8182, "num_token_overlap": 15.7422, "num_token_query": 42.128, "num_token_union": 68.4371, "num_word_context": 202.4451, "num_word_doc": 49.849, "num_word_query": 31.8212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7486.4664, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7207, "query_norm": 1.7643, "queue_k_norm": 1.8681, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.128, "sent_len_1": 66.8182, "sent_len_max_0": 127.9862, "sent_len_max_1": 190.4363, "stdk": 0.0512, "stdq": 0.0471, "stdqueue_k": 0.0516, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.3432, "doc_norm": 1.8437, "encoder_q-embeddings": 2940.2551, "encoder_q-layer.0": 2359.7581, "encoder_q-layer.1": 1919.9739, "encoder_q-layer.10": 2200.2407, "encoder_q-layer.11": 3527.1787, "encoder_q-layer.2": 1894.2102, "encoder_q-layer.3": 1637.5995, "encoder_q-layer.4": 1687.157, "encoder_q-layer.5": 1550.5737, "encoder_q-layer.6": 1713.7435, "encoder_q-layer.7": 2099.9258, "encoder_q-layer.8": 2325.1692, "encoder_q-layer.9": 2055.249, "epoch": 0.06, "inbatch_neg_score": 0.75, "inbatch_pos_score": 1.3057, "learning_rate": 2.8499999999999998e-05, "loss": 4.3432, "norm_diff": 0.08, "norm_loss": 0.0, "num_token_doc": 66.8203, "num_token_overlap": 15.8194, "num_token_query": 42.2838, "num_token_union": 68.5148, "num_word_context": 202.9347, "num_word_doc": 49.8678, "num_word_query": 31.9416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3298.0033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7471, "query_norm": 1.7638, "queue_k_norm": 1.8535, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2838, "sent_len_1": 66.8203, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7825, "stdk": 0.0508, "stdq": 0.0464, "stdqueue_k": 0.0513, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.2966, "doc_norm": 1.8355, "encoder_q-embeddings": 3801.792, "encoder_q-layer.0": 3245.6365, "encoder_q-layer.1": 3126.3164, "encoder_q-layer.10": 2622.8149, "encoder_q-layer.11": 4034.0334, "encoder_q-layer.2": 3159.752, "encoder_q-layer.3": 2881.0479, "encoder_q-layer.4": 2262.7795, "encoder_q-layer.5": 2021.4398, "encoder_q-layer.6": 1938.53, "encoder_q-layer.7": 1457.8997, "encoder_q-layer.8": 1628.2389, "encoder_q-layer.9": 1667.8589, "epoch": 0.06, "inbatch_neg_score": 0.7734, "inbatch_pos_score": 1.3594, "learning_rate": 2.9e-05, "loss": 4.2966, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.8097, "num_token_overlap": 15.9007, "num_token_query": 42.3491, "num_token_union": 68.4711, "num_word_context": 202.412, "num_word_doc": 49.8382, "num_word_query": 31.9621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4111.7613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.771, "query_norm": 1.8297, "queue_k_norm": 1.8419, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3491, "sent_len_1": 66.8097, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6175, "stdk": 0.0507, "stdq": 0.0477, "stdqueue_k": 0.051, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.2665, "doc_norm": 1.8301, "encoder_q-embeddings": 6593.6753, "encoder_q-layer.0": 5800.249, "encoder_q-layer.1": 5153.1836, "encoder_q-layer.10": 2907.9355, "encoder_q-layer.11": 4899.5103, "encoder_q-layer.2": 4879.6104, "encoder_q-layer.3": 4756.4561, "encoder_q-layer.4": 3448.0449, "encoder_q-layer.5": 2923.7544, "encoder_q-layer.6": 2184.8032, "encoder_q-layer.7": 1596.9875, "encoder_q-layer.8": 1450.6785, "encoder_q-layer.9": 1565.8777, "epoch": 0.06, "inbatch_neg_score": 0.8231, "inbatch_pos_score": 1.3809, "learning_rate": 2.95e-05, "loss": 4.2665, "norm_diff": 0.041, "norm_loss": 0.0, "num_token_doc": 66.9702, "num_token_overlap": 15.936, "num_token_query": 42.7158, "num_token_union": 68.7455, "num_word_context": 202.5776, "num_word_doc": 49.9857, "num_word_query": 32.2986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6294.8997, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8184, "query_norm": 1.7891, "queue_k_norm": 1.827, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.7158, "sent_len_1": 66.9702, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5987, "stdk": 0.0505, "stdq": 0.0459, "stdqueue_k": 0.0506, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.2718, "doc_norm": 1.8179, "encoder_q-embeddings": 1650.8918, "encoder_q-layer.0": 1282.8981, "encoder_q-layer.1": 1296.9564, "encoder_q-layer.10": 2876.9385, "encoder_q-layer.11": 4925.5107, "encoder_q-layer.2": 1457.9915, "encoder_q-layer.3": 1502.3234, "encoder_q-layer.4": 1429.6394, "encoder_q-layer.5": 1457.7424, "encoder_q-layer.6": 1819.679, "encoder_q-layer.7": 2358.0503, "encoder_q-layer.8": 2857.4006, "encoder_q-layer.9": 2751.9182, "epoch": 0.06, "inbatch_neg_score": 0.8711, "inbatch_pos_score": 1.416, "learning_rate": 3e-05, "loss": 4.2718, "norm_diff": 0.0283, "norm_loss": 0.0, "num_token_doc": 66.6171, "num_token_overlap": 15.781, "num_token_query": 42.2512, "num_token_union": 68.3825, "num_word_context": 202.1583, "num_word_doc": 49.7191, "num_word_query": 31.9362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3256.3378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8633, "query_norm": 1.8414, "queue_k_norm": 1.8204, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2512, "sent_len_1": 66.6171, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.3675, "stdk": 0.0502, "stdq": 0.0467, "stdqueue_k": 0.0504, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.2073, "doc_norm": 1.8088, "encoder_q-embeddings": 1712.1992, "encoder_q-layer.0": 1274.1622, "encoder_q-layer.1": 1293.1122, "encoder_q-layer.10": 2539.6038, "encoder_q-layer.11": 4693.5332, "encoder_q-layer.2": 1281.9066, "encoder_q-layer.3": 1388.3843, "encoder_q-layer.4": 1275.6525, "encoder_q-layer.5": 1159.6376, "encoder_q-layer.6": 1317.9149, "encoder_q-layer.7": 1385.1326, "encoder_q-layer.8": 1708.2463, "encoder_q-layer.9": 1703.9874, "epoch": 0.06, "inbatch_neg_score": 0.7996, "inbatch_pos_score": 1.3799, "learning_rate": 3.05e-05, "loss": 4.2073, "norm_diff": 0.0305, "norm_loss": 0.0, "num_token_doc": 66.872, "num_token_overlap": 15.8183, "num_token_query": 42.3803, "num_token_union": 68.6052, "num_word_context": 202.338, "num_word_doc": 49.9005, "num_word_query": 32.0096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2903.222, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7964, "query_norm": 1.8393, "queue_k_norm": 1.8124, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3803, "sent_len_1": 66.872, "sent_len_max_0": 128.0, "sent_len_max_1": 189.01, "stdk": 0.05, "stdq": 0.0461, "stdqueue_k": 0.0502, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1959, "doc_norm": 1.8056, "encoder_q-embeddings": 1248.79, "encoder_q-layer.0": 947.273, "encoder_q-layer.1": 950.1312, "encoder_q-layer.10": 2017.165, "encoder_q-layer.11": 3277.8142, "encoder_q-layer.2": 1090.552, "encoder_q-layer.3": 1113.6343, "encoder_q-layer.4": 1109.5786, "encoder_q-layer.5": 1120.924, "encoder_q-layer.6": 1336.2035, "encoder_q-layer.7": 1369.8142, "encoder_q-layer.8": 1501.7653, "encoder_q-layer.9": 1433.9021, "epoch": 0.06, "inbatch_neg_score": 0.8232, "inbatch_pos_score": 1.4004, "learning_rate": 3.1e-05, "loss": 4.1959, "norm_diff": 0.0963, "norm_loss": 0.0, "num_token_doc": 66.718, "num_token_overlap": 15.8141, "num_token_query": 42.4182, "num_token_union": 68.5308, "num_word_context": 202.2851, "num_word_doc": 49.8161, "num_word_query": 32.0589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2195.9349, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.814, "query_norm": 1.9019, "queue_k_norm": 1.808, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4182, "sent_len_1": 66.718, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7475, "stdk": 0.0497, "stdq": 0.0479, "stdqueue_k": 0.05, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.1881, "doc_norm": 1.8044, "encoder_q-embeddings": 6401.4341, "encoder_q-layer.0": 4945.4468, "encoder_q-layer.1": 4583.6797, "encoder_q-layer.10": 4285.2661, "encoder_q-layer.11": 7380.9922, "encoder_q-layer.2": 4749.8413, "encoder_q-layer.3": 4598.4429, "encoder_q-layer.4": 3271.7134, "encoder_q-layer.5": 2575.5776, "encoder_q-layer.6": 2661.8066, "encoder_q-layer.7": 2776.8284, "encoder_q-layer.8": 3318.6357, "encoder_q-layer.9": 3090.9636, "epoch": 0.06, "inbatch_neg_score": 0.8446, "inbatch_pos_score": 1.4258, "learning_rate": 3.15e-05, "loss": 4.1881, "norm_diff": 0.1169, "norm_loss": 0.0, "num_token_doc": 66.7793, "num_token_overlap": 15.8079, "num_token_query": 42.2448, "num_token_union": 68.445, "num_word_context": 201.7388, "num_word_doc": 49.8181, "num_word_query": 31.9078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6734.0496, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8403, "query_norm": 1.9212, "queue_k_norm": 1.8023, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2448, "sent_len_1": 66.7793, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3812, "stdk": 0.0498, "stdq": 0.0467, "stdqueue_k": 0.0498, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.1326, "doc_norm": 1.7975, "encoder_q-embeddings": 3601.7109, "encoder_q-layer.0": 2720.1047, "encoder_q-layer.1": 2653.0454, "encoder_q-layer.10": 3313.1782, "encoder_q-layer.11": 6571.8452, "encoder_q-layer.2": 2642.9148, "encoder_q-layer.3": 2534.9866, "encoder_q-layer.4": 2594.7808, "encoder_q-layer.5": 2236.9011, "encoder_q-layer.6": 2391.1458, "encoder_q-layer.7": 2250.4766, "encoder_q-layer.8": 2538.2017, "encoder_q-layer.9": 2600.95, "epoch": 0.06, "inbatch_neg_score": 0.8463, "inbatch_pos_score": 1.4307, "learning_rate": 3.2000000000000005e-05, "loss": 4.1326, "norm_diff": 0.1102, "norm_loss": 0.0, "num_token_doc": 67.0662, "num_token_overlap": 15.9712, "num_token_query": 42.6661, "num_token_union": 68.7152, "num_word_context": 202.0432, "num_word_doc": 49.9614, "num_word_query": 32.2093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4654.6747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8384, "query_norm": 1.9077, "queue_k_norm": 1.7982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.6661, "sent_len_1": 67.0662, "sent_len_max_0": 128.0, "sent_len_max_1": 192.7312, "stdk": 0.0495, "stdq": 0.0463, "stdqueue_k": 0.0496, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.1701, "doc_norm": 1.7896, "encoder_q-embeddings": 2219.52, "encoder_q-layer.0": 1596.4128, "encoder_q-layer.1": 1686.3744, "encoder_q-layer.10": 3886.9663, "encoder_q-layer.11": 7797.7695, "encoder_q-layer.2": 1801.2847, "encoder_q-layer.3": 1884.3688, "encoder_q-layer.4": 1868.9467, "encoder_q-layer.5": 1864.0162, "encoder_q-layer.6": 2316.8572, "encoder_q-layer.7": 2785.1685, "encoder_q-layer.8": 3279.2944, "encoder_q-layer.9": 2813.0864, "epoch": 0.06, "inbatch_neg_score": 0.7969, "inbatch_pos_score": 1.3535, "learning_rate": 3.2500000000000004e-05, "loss": 4.1701, "norm_diff": 0.1125, "norm_loss": 0.0, "num_token_doc": 66.7778, "num_token_overlap": 15.7709, "num_token_query": 42.268, "num_token_union": 68.4679, "num_word_context": 202.1101, "num_word_doc": 49.8357, "num_word_query": 31.8922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4627.6042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7896, "query_norm": 1.9021, "queue_k_norm": 1.7912, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.268, "sent_len_1": 66.7778, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6, "stdk": 0.0492, "stdq": 0.0462, "stdqueue_k": 0.0494, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.1361, "doc_norm": 1.7886, "encoder_q-embeddings": 7474.2822, "encoder_q-layer.0": 6163.7344, "encoder_q-layer.1": 5605.9453, "encoder_q-layer.10": 4476.9814, "encoder_q-layer.11": 7624.9116, "encoder_q-layer.2": 5413.9956, "encoder_q-layer.3": 5303.4482, "encoder_q-layer.4": 4604.8257, "encoder_q-layer.5": 4255.4219, "encoder_q-layer.6": 3808.6516, "encoder_q-layer.7": 3092.4116, "encoder_q-layer.8": 3554.3245, "encoder_q-layer.9": 3660.2061, "epoch": 0.06, "inbatch_neg_score": 0.7636, "inbatch_pos_score": 1.3271, "learning_rate": 3.3e-05, "loss": 4.1361, "norm_diff": 0.1991, "norm_loss": 0.0, "num_token_doc": 66.6492, "num_token_overlap": 15.8215, "num_token_query": 42.4077, "num_token_union": 68.4904, "num_word_context": 202.2937, "num_word_doc": 49.7459, "num_word_query": 32.0126, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7896.1061, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7554, "query_norm": 1.9877, "queue_k_norm": 1.7864, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4077, "sent_len_1": 66.6492, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.9437, "stdk": 0.0492, "stdq": 0.047, "stdqueue_k": 0.0493, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.0689, "doc_norm": 1.7817, "encoder_q-embeddings": 19441.8223, "encoder_q-layer.0": 16918.8379, "encoder_q-layer.1": 21448.873, "encoder_q-layer.10": 6057.395, "encoder_q-layer.11": 10638.7783, "encoder_q-layer.2": 17161.0039, "encoder_q-layer.3": 17020.1621, "encoder_q-layer.4": 18822.6816, "encoder_q-layer.5": 17968.5371, "encoder_q-layer.6": 20763.6953, "encoder_q-layer.7": 11425.3633, "encoder_q-layer.8": 6040.4653, "encoder_q-layer.9": 4912.3462, "epoch": 0.07, "inbatch_neg_score": 0.7975, "inbatch_pos_score": 1.3516, "learning_rate": 3.35e-05, "loss": 4.0689, "norm_diff": 0.2419, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 15.8741, "num_token_query": 42.4612, "num_token_union": 68.594, "num_word_context": 202.3733, "num_word_doc": 49.8131, "num_word_query": 32.0832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24050.7857, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7891, "query_norm": 2.0237, "queue_k_norm": 1.7809, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4612, "sent_len_1": 66.8218, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.1937, "stdk": 0.0491, "stdq": 0.046, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.085, "doc_norm": 1.7702, "encoder_q-embeddings": 9283.9912, "encoder_q-layer.0": 8841.2119, "encoder_q-layer.1": 7196.1587, "encoder_q-layer.10": 3638.4148, "encoder_q-layer.11": 6656.2974, "encoder_q-layer.2": 7911.9678, "encoder_q-layer.3": 7654.9663, "encoder_q-layer.4": 6565.062, "encoder_q-layer.5": 4852.9995, "encoder_q-layer.6": 4022.113, "encoder_q-layer.7": 3233.6387, "encoder_q-layer.8": 3253.2964, "encoder_q-layer.9": 3012.6863, "epoch": 0.07, "inbatch_neg_score": 0.7533, "inbatch_pos_score": 1.3125, "learning_rate": 3.4000000000000007e-05, "loss": 4.085, "norm_diff": 0.294, "norm_loss": 0.0, "num_token_doc": 66.5109, "num_token_overlap": 15.7782, "num_token_query": 42.3166, "num_token_union": 68.3871, "num_word_context": 202.0251, "num_word_doc": 49.6077, "num_word_query": 31.9832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9688.8144, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7437, "query_norm": 2.0642, "queue_k_norm": 1.7745, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3166, "sent_len_1": 66.5109, "sent_len_max_0": 127.9975, "sent_len_max_1": 186.8438, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0368, "doc_norm": 1.7775, "encoder_q-embeddings": 11853.5625, "encoder_q-layer.0": 9849.8457, "encoder_q-layer.1": 9691.7383, "encoder_q-layer.10": 3838.3484, "encoder_q-layer.11": 6681.3794, "encoder_q-layer.2": 10190.1904, "encoder_q-layer.3": 10288.0156, "encoder_q-layer.4": 10099.8467, "encoder_q-layer.5": 10240.3428, "encoder_q-layer.6": 11655.7012, "encoder_q-layer.7": 8139.9009, "encoder_q-layer.8": 4117.2192, "encoder_q-layer.9": 3163.9822, "epoch": 0.07, "inbatch_neg_score": 0.7765, "inbatch_pos_score": 1.3857, "learning_rate": 3.45e-05, "loss": 4.0368, "norm_diff": 0.3649, "norm_loss": 0.0, "num_token_doc": 66.9228, "num_token_overlap": 15.9737, "num_token_query": 42.6031, "num_token_union": 68.5999, "num_word_context": 202.5507, "num_word_doc": 49.9492, "num_word_query": 32.1873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13447.6459, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7666, "query_norm": 2.1423, "queue_k_norm": 1.7696, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.6031, "sent_len_1": 66.9228, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.435, "stdk": 0.0493, "stdq": 0.0474, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0264, "doc_norm": 1.7729, "encoder_q-embeddings": 2275.6184, "encoder_q-layer.0": 1633.2327, "encoder_q-layer.1": 1627.5854, "encoder_q-layer.10": 3294.9395, "encoder_q-layer.11": 6394.6655, "encoder_q-layer.2": 1754.9623, "encoder_q-layer.3": 1745.7313, "encoder_q-layer.4": 1793.7872, "encoder_q-layer.5": 1694.517, "encoder_q-layer.6": 1872.4866, "encoder_q-layer.7": 2003.9261, "encoder_q-layer.8": 2437.6453, "encoder_q-layer.9": 2328.3237, "epoch": 0.07, "inbatch_neg_score": 0.7651, "inbatch_pos_score": 1.3418, "learning_rate": 3.5e-05, "loss": 4.0264, "norm_diff": 0.3725, "norm_loss": 0.0, "num_token_doc": 66.8482, "num_token_overlap": 15.9221, "num_token_query": 42.3645, "num_token_union": 68.4554, "num_word_context": 202.077, "num_word_doc": 49.8774, "num_word_query": 31.993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3950.0215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7534, "query_norm": 2.1454, "queue_k_norm": 1.7627, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3645, "sent_len_1": 66.8482, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.0087, "stdk": 0.0491, "stdq": 0.0466, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0216, "doc_norm": 1.7534, "encoder_q-embeddings": 2795.5322, "encoder_q-layer.0": 2081.302, "encoder_q-layer.1": 2237.3184, "encoder_q-layer.10": 3928.4048, "encoder_q-layer.11": 6618.9658, "encoder_q-layer.2": 2753.5149, "encoder_q-layer.3": 2791.7935, "encoder_q-layer.4": 2584.8508, "encoder_q-layer.5": 2462.5039, "encoder_q-layer.6": 2484.2188, "encoder_q-layer.7": 2949.3572, "encoder_q-layer.8": 3841.0212, "encoder_q-layer.9": 3588.3735, "epoch": 0.07, "inbatch_neg_score": 0.8003, "inbatch_pos_score": 1.3701, "learning_rate": 3.55e-05, "loss": 4.0216, "norm_diff": 0.3998, "norm_loss": 0.0, "num_token_doc": 66.7236, "num_token_overlap": 15.8829, "num_token_query": 42.4399, "num_token_union": 68.5014, "num_word_context": 202.5887, "num_word_doc": 49.8122, "num_word_query": 32.0609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4807.9798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7876, "query_norm": 2.1532, "queue_k_norm": 1.7588, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4399, "sent_len_1": 66.7236, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0563, "stdk": 0.0486, "stdq": 0.0465, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9981, "doc_norm": 1.7598, "encoder_q-embeddings": 2154.6121, "encoder_q-layer.0": 1517.4257, "encoder_q-layer.1": 1651.1917, "encoder_q-layer.10": 5563.3237, "encoder_q-layer.11": 10046.2568, "encoder_q-layer.2": 1870.0764, "encoder_q-layer.3": 1964.4467, "encoder_q-layer.4": 2055.6584, "encoder_q-layer.5": 2103.9641, "encoder_q-layer.6": 2355.8418, "encoder_q-layer.7": 2716.6797, "encoder_q-layer.8": 3137.7146, "encoder_q-layer.9": 3522.9834, "epoch": 0.07, "inbatch_neg_score": 0.7104, "inbatch_pos_score": 1.2861, "learning_rate": 3.6e-05, "loss": 3.9981, "norm_diff": 0.3494, "norm_loss": 0.0, "num_token_doc": 66.8415, "num_token_overlap": 15.8797, "num_token_query": 42.4201, "num_token_union": 68.5692, "num_word_context": 202.6156, "num_word_doc": 49.9089, "num_word_query": 32.0517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5497.5156, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7026, "query_norm": 2.1092, "queue_k_norm": 1.7538, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4201, "sent_len_1": 66.8415, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.915, "stdk": 0.0488, "stdq": 0.0467, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.9794, "doc_norm": 1.7373, "encoder_q-embeddings": 7863.8506, "encoder_q-layer.0": 6294.4653, "encoder_q-layer.1": 7057.4976, "encoder_q-layer.10": 4353.7764, "encoder_q-layer.11": 7972.2168, "encoder_q-layer.2": 7649.1118, "encoder_q-layer.3": 7330.8652, "encoder_q-layer.4": 7147.5889, "encoder_q-layer.5": 7274.98, "encoder_q-layer.6": 6258.2393, "encoder_q-layer.7": 4063.1677, "encoder_q-layer.8": 2748.4668, "encoder_q-layer.9": 2443.3718, "epoch": 0.07, "inbatch_neg_score": 0.7184, "inbatch_pos_score": 1.2939, "learning_rate": 3.65e-05, "loss": 3.9794, "norm_diff": 0.342, "norm_loss": 0.0, "num_token_doc": 66.7527, "num_token_overlap": 15.9334, "num_token_query": 42.5441, "num_token_union": 68.5205, "num_word_context": 202.597, "num_word_doc": 49.8694, "num_word_query": 32.1279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9456.8833, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7046, "query_norm": 2.0792, "queue_k_norm": 1.7502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5441, "sent_len_1": 66.7527, "sent_len_max_0": 128.0, "sent_len_max_1": 186.5175, "stdk": 0.0482, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.0164, "doc_norm": 1.7402, "encoder_q-embeddings": 2000.1058, "encoder_q-layer.0": 1453.697, "encoder_q-layer.1": 1520.79, "encoder_q-layer.10": 3514.5186, "encoder_q-layer.11": 6665.7832, "encoder_q-layer.2": 1768.7942, "encoder_q-layer.3": 1845.662, "encoder_q-layer.4": 1863.5808, "encoder_q-layer.5": 1810.4906, "encoder_q-layer.6": 1905.8413, "encoder_q-layer.7": 2231.1531, "encoder_q-layer.8": 2748.717, "encoder_q-layer.9": 2850.6643, "epoch": 0.07, "inbatch_neg_score": 0.6401, "inbatch_pos_score": 1.2188, "learning_rate": 3.7e-05, "loss": 4.0164, "norm_diff": 0.3224, "norm_loss": 0.0, "num_token_doc": 66.5665, "num_token_overlap": 15.7391, "num_token_query": 42.2245, "num_token_union": 68.4006, "num_word_context": 202.2311, "num_word_doc": 49.663, "num_word_query": 31.9026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4013.7468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6279, "query_norm": 2.0626, "queue_k_norm": 1.7414, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2245, "sent_len_1": 66.5665, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.4963, "stdk": 0.0484, "stdq": 0.0472, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9615, "doc_norm": 1.7304, "encoder_q-embeddings": 6621.584, "encoder_q-layer.0": 5763.145, "encoder_q-layer.1": 6263.5806, "encoder_q-layer.10": 2822.3057, "encoder_q-layer.11": 5989.9868, "encoder_q-layer.2": 7788.6006, "encoder_q-layer.3": 7510.2612, "encoder_q-layer.4": 8817.3203, "encoder_q-layer.5": 6583.9038, "encoder_q-layer.6": 5972.9263, "encoder_q-layer.7": 3644.9434, "encoder_q-layer.8": 2966.6306, "encoder_q-layer.9": 2523.3667, "epoch": 0.07, "inbatch_neg_score": 0.589, "inbatch_pos_score": 1.166, "learning_rate": 3.7500000000000003e-05, "loss": 3.9615, "norm_diff": 0.3316, "norm_loss": 0.0, "num_token_doc": 66.6432, "num_token_overlap": 15.8392, "num_token_query": 42.3007, "num_token_union": 68.3895, "num_word_context": 202.1657, "num_word_doc": 49.7403, "num_word_query": 31.9691, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8879.9148, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5791, "query_norm": 2.062, "queue_k_norm": 1.7347, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3007, "sent_len_1": 66.6432, "sent_len_max_0": 127.99, "sent_len_max_1": 189.4275, "stdk": 0.0483, "stdq": 0.0472, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.9109, "doc_norm": 1.7321, "encoder_q-embeddings": 1798.92, "encoder_q-layer.0": 1233.6217, "encoder_q-layer.1": 1315.1769, "encoder_q-layer.10": 3352.5862, "encoder_q-layer.11": 7191.7793, "encoder_q-layer.2": 1531.9839, "encoder_q-layer.3": 1577.2104, "encoder_q-layer.4": 1551.7952, "encoder_q-layer.5": 1518.3018, "encoder_q-layer.6": 1688.3883, "encoder_q-layer.7": 1902.9398, "encoder_q-layer.8": 2284.0249, "encoder_q-layer.9": 2190.8083, "epoch": 0.07, "inbatch_neg_score": 0.5305, "inbatch_pos_score": 1.1104, "learning_rate": 3.8e-05, "loss": 3.9109, "norm_diff": 0.2668, "norm_loss": 0.0, "num_token_doc": 66.6477, "num_token_overlap": 15.8426, "num_token_query": 42.4303, "num_token_union": 68.4808, "num_word_context": 202.0786, "num_word_doc": 49.7484, "num_word_query": 32.0349, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4008.7521, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.519, "query_norm": 1.999, "queue_k_norm": 1.725, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4303, "sent_len_1": 66.6477, "sent_len_max_0": 127.9625, "sent_len_max_1": 187.7425, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.9444, "doc_norm": 1.7122, "encoder_q-embeddings": 2249.3972, "encoder_q-layer.0": 1697.0328, "encoder_q-layer.1": 1792.15, "encoder_q-layer.10": 3121.4231, "encoder_q-layer.11": 6442.6738, "encoder_q-layer.2": 1913.3395, "encoder_q-layer.3": 1937.005, "encoder_q-layer.4": 1900.2948, "encoder_q-layer.5": 1727.2599, "encoder_q-layer.6": 1867.9114, "encoder_q-layer.7": 1829.922, "encoder_q-layer.8": 2156.7578, "encoder_q-layer.9": 2243.7473, "epoch": 0.08, "inbatch_neg_score": 0.5148, "inbatch_pos_score": 1.0957, "learning_rate": 3.85e-05, "loss": 3.9444, "norm_diff": 0.3447, "norm_loss": 0.0, "num_token_doc": 66.4572, "num_token_overlap": 15.7793, "num_token_query": 42.2871, "num_token_union": 68.3263, "num_word_context": 202.0498, "num_word_doc": 49.5592, "num_word_query": 31.9295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3948.4688, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5044, "query_norm": 2.0569, "queue_k_norm": 1.7143, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2871, "sent_len_1": 66.4572, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0712, "stdk": 0.0483, "stdq": 0.0459, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9262, "doc_norm": 1.7062, "encoder_q-embeddings": 2855.2109, "encoder_q-layer.0": 2204.3103, "encoder_q-layer.1": 2398.5452, "encoder_q-layer.10": 2800.0469, "encoder_q-layer.11": 6014.2832, "encoder_q-layer.2": 2590.6479, "encoder_q-layer.3": 2423.2791, "encoder_q-layer.4": 2344.3997, "encoder_q-layer.5": 2108.6111, "encoder_q-layer.6": 2046.0, "encoder_q-layer.7": 1913.4357, "encoder_q-layer.8": 2150.8538, "encoder_q-layer.9": 2214.363, "epoch": 0.08, "inbatch_neg_score": 0.5078, "inbatch_pos_score": 1.0752, "learning_rate": 3.9000000000000006e-05, "loss": 3.9262, "norm_diff": 0.3284, "norm_loss": 0.0, "num_token_doc": 66.6593, "num_token_overlap": 15.8347, "num_token_query": 42.4622, "num_token_union": 68.4962, "num_word_context": 202.2458, "num_word_doc": 49.7367, "num_word_query": 32.0972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4190.4955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.498, "query_norm": 2.0345, "queue_k_norm": 1.703, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4622, "sent_len_1": 66.6593, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.555, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.9133, "doc_norm": 1.6888, "encoder_q-embeddings": 2322.8481, "encoder_q-layer.0": 1603.1368, "encoder_q-layer.1": 1639.5322, "encoder_q-layer.10": 2811.3516, "encoder_q-layer.11": 5205.0791, "encoder_q-layer.2": 1768.7714, "encoder_q-layer.3": 1824.527, "encoder_q-layer.4": 1870.394, "encoder_q-layer.5": 1907.5975, "encoder_q-layer.6": 2110.4282, "encoder_q-layer.7": 2225.9067, "encoder_q-layer.8": 2626.5068, "encoder_q-layer.9": 2209.4346, "epoch": 0.08, "inbatch_neg_score": 0.531, "inbatch_pos_score": 1.1367, "learning_rate": 3.9500000000000005e-05, "loss": 3.9133, "norm_diff": 0.3298, "norm_loss": 0.0, "num_token_doc": 66.6451, "num_token_overlap": 15.7956, "num_token_query": 42.2837, "num_token_union": 68.3839, "num_word_context": 202.3354, "num_word_doc": 49.7119, "num_word_query": 31.9601, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3596.2917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5215, "query_norm": 2.0187, "queue_k_norm": 1.692, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2837, "sent_len_1": 66.6451, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9175, "stdk": 0.0483, "stdq": 0.0463, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.9201, "doc_norm": 1.6812, "encoder_q-embeddings": 1845.1561, "encoder_q-layer.0": 1287.4736, "encoder_q-layer.1": 1379.0509, "encoder_q-layer.10": 2819.6482, "encoder_q-layer.11": 5517.0596, "encoder_q-layer.2": 1486.2356, "encoder_q-layer.3": 1590.1515, "encoder_q-layer.4": 1697.2527, "encoder_q-layer.5": 1649.1602, "encoder_q-layer.6": 1985.9108, "encoder_q-layer.7": 2479.7632, "encoder_q-layer.8": 2831.6204, "encoder_q-layer.9": 2401.4634, "epoch": 0.08, "inbatch_neg_score": 0.5787, "inbatch_pos_score": 1.1494, "learning_rate": 4e-05, "loss": 3.9201, "norm_diff": 0.2534, "norm_loss": 0.0, "num_token_doc": 66.6584, "num_token_overlap": 15.862, "num_token_query": 42.4987, "num_token_union": 68.4586, "num_word_context": 202.4062, "num_word_doc": 49.7395, "num_word_query": 32.1142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3579.7118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5708, "query_norm": 1.9346, "queue_k_norm": 1.6845, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4987, "sent_len_1": 66.6584, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.3413, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.9173, "doc_norm": 1.6798, "encoder_q-embeddings": 3973.1895, "encoder_q-layer.0": 3391.1772, "encoder_q-layer.1": 3178.7942, "encoder_q-layer.10": 2687.5259, "encoder_q-layer.11": 5727.3091, "encoder_q-layer.2": 3108.2666, "encoder_q-layer.3": 2972.6841, "encoder_q-layer.4": 2840.3005, "encoder_q-layer.5": 2295.8262, "encoder_q-layer.6": 2209.0813, "encoder_q-layer.7": 2229.2361, "encoder_q-layer.8": 2387.8643, "encoder_q-layer.9": 2168.4343, "epoch": 0.08, "inbatch_neg_score": 0.571, "inbatch_pos_score": 1.166, "learning_rate": 4.05e-05, "loss": 3.9173, "norm_diff": 0.2086, "norm_loss": 0.0, "num_token_doc": 66.7761, "num_token_overlap": 15.7873, "num_token_query": 42.1356, "num_token_union": 68.3795, "num_word_context": 202.2033, "num_word_doc": 49.809, "num_word_query": 31.8361, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4730.5402, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5649, "query_norm": 1.8884, "queue_k_norm": 1.6745, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1356, "sent_len_1": 66.7761, "sent_len_max_0": 127.995, "sent_len_max_1": 189.4325, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.9239, "doc_norm": 1.6672, "encoder_q-embeddings": 3913.9343, "encoder_q-layer.0": 3018.957, "encoder_q-layer.1": 2905.2146, "encoder_q-layer.10": 3291.7373, "encoder_q-layer.11": 6450.6455, "encoder_q-layer.2": 3291.8081, "encoder_q-layer.3": 3407.5273, "encoder_q-layer.4": 3276.0535, "encoder_q-layer.5": 2588.0603, "encoder_q-layer.6": 2658.5381, "encoder_q-layer.7": 2463.9358, "encoder_q-layer.8": 2711.5925, "encoder_q-layer.9": 2484.9141, "epoch": 0.08, "inbatch_neg_score": 0.5648, "inbatch_pos_score": 1.1455, "learning_rate": 4.1e-05, "loss": 3.9239, "norm_diff": 0.2238, "norm_loss": 0.0, "num_token_doc": 66.5621, "num_token_overlap": 15.8195, "num_token_query": 42.3822, "num_token_union": 68.4326, "num_word_context": 202.2599, "num_word_doc": 49.6735, "num_word_query": 32.0199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5040.2215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.8909, "queue_k_norm": 1.6683, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3822, "sent_len_1": 66.5621, "sent_len_max_0": 128.0, "sent_len_max_1": 189.365, "stdk": 0.0484, "stdq": 0.0469, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8977, "doc_norm": 1.6532, "encoder_q-embeddings": 7628.2109, "encoder_q-layer.0": 5415.6401, "encoder_q-layer.1": 5230.8848, "encoder_q-layer.10": 5779.2769, "encoder_q-layer.11": 11336.8086, "encoder_q-layer.2": 5850.186, "encoder_q-layer.3": 6013.8413, "encoder_q-layer.4": 5930.2109, "encoder_q-layer.5": 5028.0669, "encoder_q-layer.6": 4856.001, "encoder_q-layer.7": 4430.2051, "encoder_q-layer.8": 4449.293, "encoder_q-layer.9": 4194.1655, "epoch": 0.08, "inbatch_neg_score": 0.5474, "inbatch_pos_score": 1.1465, "learning_rate": 4.15e-05, "loss": 3.8977, "norm_diff": 0.2123, "norm_loss": 0.0, "num_token_doc": 66.9351, "num_token_overlap": 15.8916, "num_token_query": 42.612, "num_token_union": 68.676, "num_word_context": 202.7706, "num_word_doc": 49.9439, "num_word_query": 32.1806, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9114.604, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5425, "query_norm": 1.8656, "queue_k_norm": 1.659, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.612, "sent_len_1": 66.9351, "sent_len_max_0": 127.9775, "sent_len_max_1": 189.39, "stdk": 0.0479, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9076, "doc_norm": 1.6508, "encoder_q-embeddings": 14891.8379, "encoder_q-layer.0": 11445.3271, "encoder_q-layer.1": 10745.0674, "encoder_q-layer.10": 2709.6709, "encoder_q-layer.11": 5684.0352, "encoder_q-layer.2": 11059.8467, "encoder_q-layer.3": 10581.4785, "encoder_q-layer.4": 9587.2744, "encoder_q-layer.5": 8037.376, "encoder_q-layer.6": 8406.498, "encoder_q-layer.7": 5238.8467, "encoder_q-layer.8": 2966.5598, "encoder_q-layer.9": 2403.8447, "epoch": 0.08, "inbatch_neg_score": 0.5341, "inbatch_pos_score": 1.1133, "learning_rate": 4.2e-05, "loss": 3.9076, "norm_diff": 0.3013, "norm_loss": 0.0, "num_token_doc": 66.4166, "num_token_overlap": 15.7566, "num_token_query": 42.1906, "num_token_union": 68.2625, "num_word_context": 201.8665, "num_word_doc": 49.5924, "num_word_query": 31.8917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13855.0712, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5254, "query_norm": 1.952, "queue_k_norm": 1.6504, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1906, "sent_len_1": 66.4166, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6087, "stdk": 0.0481, "stdq": 0.0475, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.8461, "doc_norm": 1.65, "encoder_q-embeddings": 1897.8337, "encoder_q-layer.0": 1381.1896, "encoder_q-layer.1": 1406.7015, "encoder_q-layer.10": 2192.1624, "encoder_q-layer.11": 4893.582, "encoder_q-layer.2": 1525.3241, "encoder_q-layer.3": 1534.4785, "encoder_q-layer.4": 1523.3303, "encoder_q-layer.5": 1475.6324, "encoder_q-layer.6": 1607.3479, "encoder_q-layer.7": 1809.538, "encoder_q-layer.8": 2174.3501, "encoder_q-layer.9": 1846.8379, "epoch": 0.08, "inbatch_neg_score": 0.5501, "inbatch_pos_score": 1.1602, "learning_rate": 4.25e-05, "loss": 3.8461, "norm_diff": 0.3699, "norm_loss": 0.0, "num_token_doc": 66.9049, "num_token_overlap": 15.9013, "num_token_query": 42.4302, "num_token_union": 68.5587, "num_word_context": 202.7325, "num_word_doc": 49.9482, "num_word_query": 32.0631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3211.4474, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5396, "query_norm": 2.0198, "queue_k_norm": 1.6451, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4302, "sent_len_1": 66.9049, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.315, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8407, "doc_norm": 1.6351, "encoder_q-embeddings": 1838.7858, "encoder_q-layer.0": 1276.8746, "encoder_q-layer.1": 1330.5538, "encoder_q-layer.10": 2340.1831, "encoder_q-layer.11": 5045.9507, "encoder_q-layer.2": 1396.4297, "encoder_q-layer.3": 1393.9105, "encoder_q-layer.4": 1489.8556, "encoder_q-layer.5": 1474.321, "encoder_q-layer.6": 1676.1499, "encoder_q-layer.7": 1729.1273, "encoder_q-layer.8": 2053.1157, "encoder_q-layer.9": 1886.0234, "epoch": 0.08, "inbatch_neg_score": 0.5868, "inbatch_pos_score": 1.1943, "learning_rate": 4.3e-05, "loss": 3.8407, "norm_diff": 0.4432, "norm_loss": 0.0, "num_token_doc": 66.762, "num_token_overlap": 15.787, "num_token_query": 42.3806, "num_token_union": 68.5458, "num_word_context": 202.1888, "num_word_doc": 49.7985, "num_word_query": 31.9945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3192.1578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5771, "query_norm": 2.0783, "queue_k_norm": 1.6412, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3806, "sent_len_1": 66.762, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.585, "stdk": 0.048, "stdq": 0.0461, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.8231, "doc_norm": 1.6431, "encoder_q-embeddings": 2851.1042, "encoder_q-layer.0": 1906.3413, "encoder_q-layer.1": 1810.7194, "encoder_q-layer.10": 2736.3984, "encoder_q-layer.11": 5870.0542, "encoder_q-layer.2": 1958.6102, "encoder_q-layer.3": 1892.5847, "encoder_q-layer.4": 1962.4117, "encoder_q-layer.5": 1934.6919, "encoder_q-layer.6": 2213.7576, "encoder_q-layer.7": 2293.9048, "encoder_q-layer.8": 2569.6125, "encoder_q-layer.9": 2396.8237, "epoch": 0.08, "inbatch_neg_score": 0.6578, "inbatch_pos_score": 1.249, "learning_rate": 4.35e-05, "loss": 3.8231, "norm_diff": 0.4367, "norm_loss": 0.0, "num_token_doc": 66.6919, "num_token_overlap": 15.8092, "num_token_query": 42.2846, "num_token_union": 68.3968, "num_word_context": 202.2413, "num_word_doc": 49.7411, "num_word_query": 31.901, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3895.0309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6489, "query_norm": 2.0799, "queue_k_norm": 1.6357, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2846, "sent_len_1": 66.6919, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5238, "stdk": 0.0484, "stdq": 0.0462, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8364, "doc_norm": 1.6334, "encoder_q-embeddings": 1678.6987, "encoder_q-layer.0": 1149.4941, "encoder_q-layer.1": 1179.0723, "encoder_q-layer.10": 2386.1917, "encoder_q-layer.11": 5003.3794, "encoder_q-layer.2": 1298.0957, "encoder_q-layer.3": 1377.0768, "encoder_q-layer.4": 1436.8022, "encoder_q-layer.5": 1418.5789, "encoder_q-layer.6": 1659.0817, "encoder_q-layer.7": 1791.5594, "encoder_q-layer.8": 2021.1727, "encoder_q-layer.9": 1922.9086, "epoch": 0.09, "inbatch_neg_score": 0.6434, "inbatch_pos_score": 1.2314, "learning_rate": 4.4000000000000006e-05, "loss": 3.8364, "norm_diff": 0.3344, "norm_loss": 0.0, "num_token_doc": 66.6504, "num_token_overlap": 15.855, "num_token_query": 42.3956, "num_token_union": 68.3952, "num_word_context": 202.0697, "num_word_doc": 49.7172, "num_word_query": 32.0231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3116.4209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6343, "query_norm": 1.9678, "queue_k_norm": 1.634, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3956, "sent_len_1": 66.6504, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.7175, "stdk": 0.0479, "stdq": 0.046, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8335, "doc_norm": 1.6311, "encoder_q-embeddings": 1592.637, "encoder_q-layer.0": 1099.4357, "encoder_q-layer.1": 1194.1171, "encoder_q-layer.10": 2269.4983, "encoder_q-layer.11": 5202.3662, "encoder_q-layer.2": 1310.7164, "encoder_q-layer.3": 1404.8197, "encoder_q-layer.4": 1438.301, "encoder_q-layer.5": 1545.5659, "encoder_q-layer.6": 1844.3179, "encoder_q-layer.7": 1926.9281, "encoder_q-layer.8": 2246.0476, "encoder_q-layer.9": 1972.9286, "epoch": 0.09, "inbatch_neg_score": 0.6258, "inbatch_pos_score": 1.2188, "learning_rate": 4.4500000000000004e-05, "loss": 3.8335, "norm_diff": 0.2285, "norm_loss": 0.0, "num_token_doc": 66.7535, "num_token_overlap": 15.8456, "num_token_query": 42.312, "num_token_union": 68.432, "num_word_context": 201.9486, "num_word_doc": 49.7906, "num_word_query": 31.9214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3175.1964, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6196, "query_norm": 1.8595, "queue_k_norm": 1.6349, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.312, "sent_len_1": 66.7535, "sent_len_max_0": 127.985, "sent_len_max_1": 189.97, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8233, "doc_norm": 1.6306, "encoder_q-embeddings": 2409.344, "encoder_q-layer.0": 1827.3499, "encoder_q-layer.1": 2000.1831, "encoder_q-layer.10": 2752.1536, "encoder_q-layer.11": 5931.5015, "encoder_q-layer.2": 2149.511, "encoder_q-layer.3": 2102.9517, "encoder_q-layer.4": 2169.4182, "encoder_q-layer.5": 1951.5819, "encoder_q-layer.6": 1843.5642, "encoder_q-layer.7": 1814.1978, "encoder_q-layer.8": 2174.9985, "encoder_q-layer.9": 2029.7711, "epoch": 0.09, "inbatch_neg_score": 0.613, "inbatch_pos_score": 1.2061, "learning_rate": 4.5e-05, "loss": 3.8233, "norm_diff": 0.2189, "norm_loss": 0.0, "num_token_doc": 66.6662, "num_token_overlap": 15.7597, "num_token_query": 42.1871, "num_token_union": 68.3792, "num_word_context": 202.2299, "num_word_doc": 49.7105, "num_word_query": 31.8275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3832.0845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6055, "query_norm": 1.8495, "queue_k_norm": 1.6337, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1871, "sent_len_1": 66.6662, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5513, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.7741, "doc_norm": 1.6246, "encoder_q-embeddings": 1692.438, "encoder_q-layer.0": 1155.0004, "encoder_q-layer.1": 1279.3093, "encoder_q-layer.10": 2146.0671, "encoder_q-layer.11": 4908.7803, "encoder_q-layer.2": 1426.3527, "encoder_q-layer.3": 1465.8412, "encoder_q-layer.4": 1516.6301, "encoder_q-layer.5": 1514.9316, "encoder_q-layer.6": 1630.6322, "encoder_q-layer.7": 1815.165, "encoder_q-layer.8": 2142.1594, "encoder_q-layer.9": 1872.3116, "epoch": 0.09, "inbatch_neg_score": 0.554, "inbatch_pos_score": 1.1436, "learning_rate": 4.55e-05, "loss": 3.7741, "norm_diff": 0.2319, "norm_loss": 0.0, "num_token_doc": 66.7751, "num_token_overlap": 15.8426, "num_token_query": 42.5537, "num_token_union": 68.6255, "num_word_context": 202.6877, "num_word_doc": 49.8442, "num_word_query": 32.136, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3074.6612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5469, "query_norm": 1.8566, "queue_k_norm": 1.6286, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5537, "sent_len_1": 66.7751, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6987, "stdk": 0.0479, "stdq": 0.0455, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.7803, "doc_norm": 1.6288, "encoder_q-embeddings": 1617.0859, "encoder_q-layer.0": 1107.5592, "encoder_q-layer.1": 1177.2965, "encoder_q-layer.10": 2257.1279, "encoder_q-layer.11": 4805.8657, "encoder_q-layer.2": 1353.714, "encoder_q-layer.3": 1480.1215, "encoder_q-layer.4": 1559.3702, "encoder_q-layer.5": 1547.217, "encoder_q-layer.6": 1621.3195, "encoder_q-layer.7": 1678.5663, "encoder_q-layer.8": 1981.783, "encoder_q-layer.9": 1755.7471, "epoch": 0.09, "inbatch_neg_score": 0.5577, "inbatch_pos_score": 1.1309, "learning_rate": 4.600000000000001e-05, "loss": 3.7803, "norm_diff": 0.3031, "norm_loss": 0.0, "num_token_doc": 66.8071, "num_token_overlap": 15.822, "num_token_query": 42.4189, "num_token_union": 68.542, "num_word_context": 202.4628, "num_word_doc": 49.8466, "num_word_query": 32.0395, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3057.5209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5464, "query_norm": 1.9319, "queue_k_norm": 1.6236, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4189, "sent_len_1": 66.8071, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.5525, "stdk": 0.0482, "stdq": 0.0445, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.7579, "doc_norm": 1.6261, "encoder_q-embeddings": 2057.5054, "encoder_q-layer.0": 1371.5819, "encoder_q-layer.1": 1412.1417, "encoder_q-layer.10": 3320.7854, "encoder_q-layer.11": 7452.0513, "encoder_q-layer.2": 1613.2302, "encoder_q-layer.3": 1747.1549, "encoder_q-layer.4": 1819.7592, "encoder_q-layer.5": 1797.9692, "encoder_q-layer.6": 1835.6348, "encoder_q-layer.7": 1938.996, "encoder_q-layer.8": 2149.3066, "encoder_q-layer.9": 2117.5044, "epoch": 0.09, "inbatch_neg_score": 0.5275, "inbatch_pos_score": 1.1416, "learning_rate": 4.6500000000000005e-05, "loss": 3.7579, "norm_diff": 0.3235, "norm_loss": 0.0, "num_token_doc": 66.8772, "num_token_overlap": 15.9228, "num_token_query": 42.4879, "num_token_union": 68.56, "num_word_context": 202.142, "num_word_doc": 49.9562, "num_word_query": 32.0978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4196.3345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.522, "query_norm": 1.9496, "queue_k_norm": 1.6216, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4879, "sent_len_1": 66.8772, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3887, "stdk": 0.0482, "stdq": 0.0453, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.7305, "doc_norm": 1.6196, "encoder_q-embeddings": 2213.2092, "encoder_q-layer.0": 1688.0106, "encoder_q-layer.1": 1678.738, "encoder_q-layer.10": 2197.1963, "encoder_q-layer.11": 4640.7676, "encoder_q-layer.2": 1858.1106, "encoder_q-layer.3": 1886.1858, "encoder_q-layer.4": 1773.9873, "encoder_q-layer.5": 1695.2791, "encoder_q-layer.6": 1654.9509, "encoder_q-layer.7": 1693.4694, "encoder_q-layer.8": 1947.016, "encoder_q-layer.9": 1726.4698, "epoch": 0.09, "inbatch_neg_score": 0.5667, "inbatch_pos_score": 1.168, "learning_rate": 4.7e-05, "loss": 3.7305, "norm_diff": 0.3297, "norm_loss": 0.0, "num_token_doc": 66.9753, "num_token_overlap": 15.8261, "num_token_query": 42.3781, "num_token_union": 68.6463, "num_word_context": 202.6764, "num_word_doc": 49.9914, "num_word_query": 32.0187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3284.9192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.9494, "queue_k_norm": 1.617, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3781, "sent_len_1": 66.9753, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.8313, "stdk": 0.0481, "stdq": 0.0455, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.7474, "doc_norm": 1.6139, "encoder_q-embeddings": 3441.011, "encoder_q-layer.0": 2736.7949, "encoder_q-layer.1": 3177.1357, "encoder_q-layer.10": 2020.9305, "encoder_q-layer.11": 4721.9492, "encoder_q-layer.2": 3663.5186, "encoder_q-layer.3": 4063.5466, "encoder_q-layer.4": 4354.4028, "encoder_q-layer.5": 3694.23, "encoder_q-layer.6": 3564.5132, "encoder_q-layer.7": 2494.4048, "encoder_q-layer.8": 2009.2863, "encoder_q-layer.9": 1709.4719, "epoch": 0.09, "inbatch_neg_score": 0.5826, "inbatch_pos_score": 1.1699, "learning_rate": 4.75e-05, "loss": 3.7474, "norm_diff": 0.2553, "norm_loss": 0.0, "num_token_doc": 66.7526, "num_token_overlap": 15.7096, "num_token_query": 41.994, "num_token_union": 68.3237, "num_word_context": 202.1047, "num_word_doc": 49.8, "num_word_query": 31.6947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4926.4189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5747, "query_norm": 1.8692, "queue_k_norm": 1.6118, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 41.994, "sent_len_1": 66.7526, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9875, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.7361, "doc_norm": 1.6077, "encoder_q-embeddings": 1594.8082, "encoder_q-layer.0": 1117.4308, "encoder_q-layer.1": 1217.5167, "encoder_q-layer.10": 3355.0742, "encoder_q-layer.11": 7315.6328, "encoder_q-layer.2": 1428.4796, "encoder_q-layer.3": 1522.2258, "encoder_q-layer.4": 1697.6093, "encoder_q-layer.5": 1695.7607, "encoder_q-layer.6": 1864.5831, "encoder_q-layer.7": 2067.908, "encoder_q-layer.8": 2240.3516, "encoder_q-layer.9": 2180.5161, "epoch": 0.09, "inbatch_neg_score": 0.5917, "inbatch_pos_score": 1.1826, "learning_rate": 4.8e-05, "loss": 3.7361, "norm_diff": 0.1841, "norm_loss": 0.0, "num_token_doc": 66.8875, "num_token_overlap": 15.821, "num_token_query": 42.3874, "num_token_union": 68.5652, "num_word_context": 202.4151, "num_word_doc": 49.9178, "num_word_query": 32.0128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3928.6543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5859, "query_norm": 1.7918, "queue_k_norm": 1.6113, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3874, "sent_len_1": 66.8875, "sent_len_max_0": 128.0, "sent_len_max_1": 190.26, "stdk": 0.0478, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.7686, "doc_norm": 1.5935, "encoder_q-embeddings": 1778.2916, "encoder_q-layer.0": 1280.8059, "encoder_q-layer.1": 1346.5647, "encoder_q-layer.10": 2283.499, "encoder_q-layer.11": 5680.0454, "encoder_q-layer.2": 1531.7302, "encoder_q-layer.3": 1546.7723, "encoder_q-layer.4": 1625.3726, "encoder_q-layer.5": 1676.3004, "encoder_q-layer.6": 1867.6982, "encoder_q-layer.7": 1806.6865, "encoder_q-layer.8": 1989.6768, "encoder_q-layer.9": 1735.0798, "epoch": 0.09, "inbatch_neg_score": 0.5636, "inbatch_pos_score": 1.1377, "learning_rate": 4.85e-05, "loss": 3.7686, "norm_diff": 0.1382, "norm_loss": 0.0, "num_token_doc": 66.8005, "num_token_overlap": 15.7936, "num_token_query": 42.2817, "num_token_union": 68.4855, "num_word_context": 202.4027, "num_word_doc": 49.8814, "num_word_query": 31.9561, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3298.7981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5557, "query_norm": 1.7317, "queue_k_norm": 1.6032, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2817, "sent_len_1": 66.8005, "sent_len_max_0": 127.9887, "sent_len_max_1": 187.9137, "stdk": 0.0474, "stdq": 0.0444, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.7295, "doc_norm": 1.5895, "encoder_q-embeddings": 15625.9062, "encoder_q-layer.0": 11788.6631, "encoder_q-layer.1": 10528.2686, "encoder_q-layer.10": 2181.9937, "encoder_q-layer.11": 4623.1431, "encoder_q-layer.2": 10973.875, "encoder_q-layer.3": 11160.3838, "encoder_q-layer.4": 8941.8027, "encoder_q-layer.5": 8345.2432, "encoder_q-layer.6": 9286.6182, "encoder_q-layer.7": 4571.9595, "encoder_q-layer.8": 2187.9133, "encoder_q-layer.9": 1738.1653, "epoch": 0.1, "inbatch_neg_score": 0.5581, "inbatch_pos_score": 1.1445, "learning_rate": 4.9e-05, "loss": 3.7295, "norm_diff": 0.148, "norm_loss": 0.0, "num_token_doc": 66.6533, "num_token_overlap": 15.863, "num_token_query": 42.5004, "num_token_union": 68.494, "num_word_context": 202.2029, "num_word_doc": 49.7462, "num_word_query": 32.0917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14042.2906, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5513, "query_norm": 1.7375, "queue_k_norm": 1.6011, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5004, "sent_len_1": 66.6533, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.25, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.719, "doc_norm": 1.6031, "encoder_q-embeddings": 1711.8026, "encoder_q-layer.0": 1253.8215, "encoder_q-layer.1": 1294.7051, "encoder_q-layer.10": 2127.2292, "encoder_q-layer.11": 4563.6953, "encoder_q-layer.2": 1423.5973, "encoder_q-layer.3": 1460.8055, "encoder_q-layer.4": 1529.6897, "encoder_q-layer.5": 1504.075, "encoder_q-layer.6": 1654.1797, "encoder_q-layer.7": 1608.087, "encoder_q-layer.8": 1790.1974, "encoder_q-layer.9": 1702.7017, "epoch": 0.1, "inbatch_neg_score": 0.5497, "inbatch_pos_score": 1.1523, "learning_rate": 4.9500000000000004e-05, "loss": 3.719, "norm_diff": 0.1912, "norm_loss": 0.0, "num_token_doc": 67.1128, "num_token_overlap": 15.8828, "num_token_query": 42.287, "num_token_union": 68.5973, "num_word_context": 202.5724, "num_word_doc": 50.0292, "num_word_query": 31.9317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2863.8751, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5435, "query_norm": 1.7943, "queue_k_norm": 1.596, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.287, "sent_len_1": 67.1128, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.7525, "stdk": 0.0481, "stdq": 0.0454, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.7015, "doc_norm": 1.5907, "encoder_q-embeddings": 2488.5935, "encoder_q-layer.0": 1828.3402, "encoder_q-layer.1": 2022.577, "encoder_q-layer.10": 2146.7078, "encoder_q-layer.11": 5297.3901, "encoder_q-layer.2": 2301.3594, "encoder_q-layer.3": 2314.8003, "encoder_q-layer.4": 2560.0361, "encoder_q-layer.5": 2481.8687, "encoder_q-layer.6": 2766.4182, "encoder_q-layer.7": 2864.9094, "encoder_q-layer.8": 2437.4722, "encoder_q-layer.9": 1880.8311, "epoch": 0.1, "inbatch_neg_score": 0.539, "inbatch_pos_score": 1.1221, "learning_rate": 5e-05, "loss": 3.7015, "norm_diff": 0.1572, "norm_loss": 0.0, "num_token_doc": 66.7059, "num_token_overlap": 15.8478, "num_token_query": 42.3196, "num_token_union": 68.4168, "num_word_context": 202.0535, "num_word_doc": 49.8049, "num_word_query": 31.9678, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3932.8058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5317, "query_norm": 1.7479, "queue_k_norm": 1.593, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3196, "sent_len_1": 66.7059, "sent_len_max_0": 127.985, "sent_len_max_1": 189.3237, "stdk": 0.0475, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 27.1171, "dev_samples_per_second": 2.36, "dev_steps_per_second": 0.037, "epoch": 0.1, "step": 10000, "test_accuracy": 91.19873046875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5107851028442383, "test_doc_norm": 1.5158181190490723, "test_inbatch_neg_score": 0.8391999006271362, "test_inbatch_pos_score": 1.606733798980713, "test_loss": 0.5107851028442383, "test_loss_align": 1.9982426166534424, "test_loss_unif": 3.469186782836914, "test_loss_unif_q@queue": 3.469186782836914, "test_norm_diff": 0.18560142815113068, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5162638425827026, "test_query_norm": 1.701419472694397, "test_queue_k_norm": 1.5926156044006348, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03956793248653412, "test_stdq": 0.03859269246459007, "test_stdqueue_k": 0.04780745133757591, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.1171, "dev_samples_per_second": 2.36, "dev_steps_per_second": 0.037, "epoch": 0.1, "eval_beir-arguana_ndcg@10": 0.33349, "eval_beir-arguana_recall@10": 0.58179, "eval_beir-arguana_recall@100": 0.90683, "eval_beir-arguana_recall@20": 0.71479, "eval_beir-avg_ndcg@10": 0.23804124999999998, "eval_beir-avg_recall@10": 0.29646475, "eval_beir-avg_recall@100": 0.5075913333333333, "eval_beir-avg_recall@20": 0.3653301666666667, "eval_beir-cqadupstack_ndcg@10": 0.16995249999999998, "eval_beir-cqadupstack_recall@10": 0.2400175, "eval_beir-cqadupstack_recall@100": 0.4559333333333333, "eval_beir-cqadupstack_recall@20": 0.2979316666666667, "eval_beir-fiqa_ndcg@10": 0.15344, "eval_beir-fiqa_recall@10": 0.19665, "eval_beir-fiqa_recall@100": 0.43651, "eval_beir-fiqa_recall@20": 0.26469, "eval_beir-nfcorpus_ndcg@10": 0.22349, "eval_beir-nfcorpus_recall@10": 0.10516, "eval_beir-nfcorpus_recall@100": 0.22795, "eval_beir-nfcorpus_recall@20": 0.12803, "eval_beir-nq_ndcg@10": 0.13331, "eval_beir-nq_recall@10": 0.22371, "eval_beir-nq_recall@100": 0.5506, "eval_beir-nq_recall@20": 0.32433, "eval_beir-quora_ndcg@10": 0.3092, "eval_beir-quora_recall@10": 0.42055, "eval_beir-quora_recall@100": 0.69742, "eval_beir-quora_recall@20": 0.51263, "eval_beir-scidocs_ndcg@10": 0.10741, "eval_beir-scidocs_recall@10": 0.11238, "eval_beir-scidocs_recall@100": 0.28987, "eval_beir-scidocs_recall@20": 0.16192, "eval_beir-scifact_ndcg@10": 0.52987, "eval_beir-scifact_recall@10": 0.67417, "eval_beir-scifact_recall@100": 0.86967, "eval_beir-scifact_recall@20": 0.76944, "eval_beir-trec-covid_ndcg@10": 0.2865, "eval_beir-trec-covid_recall@10": 0.314, "eval_beir-trec-covid_recall@100": 0.2682, "eval_beir-trec-covid_recall@20": 0.321, "eval_beir-webis-touche2020_ndcg@10": 0.13375, "eval_beir-webis-touche2020_recall@10": 0.09622, "eval_beir-webis-touche2020_recall@100": 0.37293, "eval_beir-webis-touche2020_recall@20": 0.15854, "eval_senteval-avg_sts": 0.6990567044678255, "eval_senteval-sickr_spearman": 0.660047766792775, "eval_senteval-stsb_spearman": 0.7380656421428761, "step": 10000, "test_accuracy": 91.19873046875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5107851028442383, "test_doc_norm": 1.5158181190490723, "test_inbatch_neg_score": 0.8391999006271362, "test_inbatch_pos_score": 1.606733798980713, "test_loss": 0.5107851028442383, "test_loss_align": 1.9982426166534424, "test_loss_unif": 3.469186782836914, "test_loss_unif_q@queue": 3.469186782836914, "test_norm_diff": 0.18560142815113068, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5162638425827026, "test_query_norm": 1.701419472694397, "test_queue_k_norm": 1.5926156044006348, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03956793248653412, "test_stdq": 0.03859269246459007, "test_stdqueue_k": 0.04780745133757591, "test_stdqueue_q": 0.0 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.7105, "doc_norm": 1.5841, "encoder_q-embeddings": 4175.4155, "encoder_q-layer.0": 3050.7666, "encoder_q-layer.1": 3124.9233, "encoder_q-layer.10": 1774.2985, "encoder_q-layer.11": 4970.8511, "encoder_q-layer.2": 2788.4749, "encoder_q-layer.3": 2839.9595, "encoder_q-layer.4": 2537.1824, "encoder_q-layer.5": 2348.8501, "encoder_q-layer.6": 2820.1643, "encoder_q-layer.7": 2197.5879, "encoder_q-layer.8": 1714.1166, "encoder_q-layer.9": 1520.8208, "epoch": 0.1, "inbatch_neg_score": 0.5382, "inbatch_pos_score": 1.125, "learning_rate": 4.994444444444445e-05, "loss": 3.7105, "norm_diff": 0.1047, "norm_loss": 0.0, "num_token_doc": 66.7054, "num_token_overlap": 15.8094, "num_token_query": 42.313, "num_token_union": 68.4551, "num_word_context": 202.0564, "num_word_doc": 49.7691, "num_word_query": 31.969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4394.185, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5312, "query_norm": 1.6888, "queue_k_norm": 1.5883, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.313, "sent_len_1": 66.7054, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1438, "stdk": 0.0475, "stdq": 0.0439, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.6754, "doc_norm": 1.5889, "encoder_q-embeddings": 1523.5905, "encoder_q-layer.0": 1080.5884, "encoder_q-layer.1": 1168.2668, "encoder_q-layer.10": 1836.371, "encoder_q-layer.11": 4114.8594, "encoder_q-layer.2": 1293.7109, "encoder_q-layer.3": 1345.9318, "encoder_q-layer.4": 1457.7764, "encoder_q-layer.5": 1520.4768, "encoder_q-layer.6": 1747.6918, "encoder_q-layer.7": 2061.176, "encoder_q-layer.8": 1956.5194, "encoder_q-layer.9": 1595.4849, "epoch": 0.1, "inbatch_neg_score": 0.526, "inbatch_pos_score": 1.1338, "learning_rate": 4.9888888888888894e-05, "loss": 3.6754, "norm_diff": 0.1037, "norm_loss": 0.0, "num_token_doc": 66.9794, "num_token_overlap": 15.9512, "num_token_query": 42.6357, "num_token_union": 68.6701, "num_word_context": 202.6132, "num_word_doc": 49.9256, "num_word_query": 32.2127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2805.2212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.522, "query_norm": 1.6925, "queue_k_norm": 1.5863, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.6357, "sent_len_1": 66.9794, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9575, "stdk": 0.0478, "stdq": 0.0444, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.7143, "doc_norm": 1.5824, "encoder_q-embeddings": 2753.8523, "encoder_q-layer.0": 2021.1886, "encoder_q-layer.1": 2377.2615, "encoder_q-layer.10": 1988.151, "encoder_q-layer.11": 4984.3623, "encoder_q-layer.2": 2471.3608, "encoder_q-layer.3": 2444.0649, "encoder_q-layer.4": 2371.5144, "encoder_q-layer.5": 2284.5103, "encoder_q-layer.6": 2463.5801, "encoder_q-layer.7": 2347.9958, "encoder_q-layer.8": 2513.6477, "encoder_q-layer.9": 2027.5648, "epoch": 0.1, "inbatch_neg_score": 0.5148, "inbatch_pos_score": 1.1191, "learning_rate": 4.9833333333333336e-05, "loss": 3.7143, "norm_diff": 0.036, "norm_loss": 0.0, "num_token_doc": 66.6085, "num_token_overlap": 15.7965, "num_token_query": 42.4869, "num_token_union": 68.5507, "num_word_context": 202.6246, "num_word_doc": 49.7089, "num_word_query": 32.1022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3843.0839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5107, "query_norm": 1.6184, "queue_k_norm": 1.5822, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4869, "sent_len_1": 66.6085, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.5025, "stdk": 0.0478, "stdq": 0.043, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6928, "doc_norm": 1.5816, "encoder_q-embeddings": 3482.0225, "encoder_q-layer.0": 2393.1885, "encoder_q-layer.1": 2606.0642, "encoder_q-layer.10": 4092.6418, "encoder_q-layer.11": 11653.5332, "encoder_q-layer.2": 3007.675, "encoder_q-layer.3": 3341.8481, "encoder_q-layer.4": 3722.1892, "encoder_q-layer.5": 3666.2556, "encoder_q-layer.6": 3879.0107, "encoder_q-layer.7": 4076.5518, "encoder_q-layer.8": 4645.1528, "encoder_q-layer.9": 3628.6223, "epoch": 0.1, "inbatch_neg_score": 0.514, "inbatch_pos_score": 1.1162, "learning_rate": 4.977777777777778e-05, "loss": 3.6928, "norm_diff": 0.0857, "norm_loss": 0.0, "num_token_doc": 66.6351, "num_token_overlap": 15.7702, "num_token_query": 42.2498, "num_token_union": 68.3586, "num_word_context": 202.2373, "num_word_doc": 49.7491, "num_word_query": 31.9239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6850.6969, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5078, "query_norm": 1.6673, "queue_k_norm": 1.577, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2498, "sent_len_1": 66.6351, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0625, "stdk": 0.0477, "stdq": 0.0448, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.6698, "doc_norm": 1.5708, "encoder_q-embeddings": 13042.6553, "encoder_q-layer.0": 9684.8877, "encoder_q-layer.1": 8061.7437, "encoder_q-layer.10": 4229.2173, "encoder_q-layer.11": 9666.1895, "encoder_q-layer.2": 9009.9512, "encoder_q-layer.3": 9186.6787, "encoder_q-layer.4": 12382.6924, "encoder_q-layer.5": 11488.2812, "encoder_q-layer.6": 14779.0146, "encoder_q-layer.7": 20441.2871, "encoder_q-layer.8": 32397.5234, "encoder_q-layer.9": 18814.4219, "epoch": 0.1, "inbatch_neg_score": 0.5281, "inbatch_pos_score": 1.1377, "learning_rate": 4.972222222222223e-05, "loss": 3.6698, "norm_diff": 0.0854, "norm_loss": 0.0, "num_token_doc": 66.622, "num_token_overlap": 15.8385, "num_token_query": 42.3818, "num_token_union": 68.4377, "num_word_context": 202.2355, "num_word_doc": 49.7139, "num_word_query": 32.0045, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22251.1477, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5229, "query_norm": 1.6563, "queue_k_norm": 1.5721, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3818, "sent_len_1": 66.622, "sent_len_max_0": 128.0, "sent_len_max_1": 187.19, "stdk": 0.0475, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.7064, "doc_norm": 1.5739, "encoder_q-embeddings": 3970.9734, "encoder_q-layer.0": 3030.4119, "encoder_q-layer.1": 3114.0024, "encoder_q-layer.10": 2185.9819, "encoder_q-layer.11": 5073.0806, "encoder_q-layer.2": 3878.5339, "encoder_q-layer.3": 4146.8594, "encoder_q-layer.4": 4686.5029, "encoder_q-layer.5": 4636.9883, "encoder_q-layer.6": 4132.7607, "encoder_q-layer.7": 3387.7053, "encoder_q-layer.8": 3118.6816, "encoder_q-layer.9": 2889.7917, "epoch": 0.1, "inbatch_neg_score": 0.5137, "inbatch_pos_score": 1.1016, "learning_rate": 4.966666666666667e-05, "loss": 3.7064, "norm_diff": 0.0301, "norm_loss": 0.0, "num_token_doc": 66.6199, "num_token_overlap": 15.7745, "num_token_query": 42.2454, "num_token_union": 68.3814, "num_word_context": 202.1891, "num_word_doc": 49.752, "num_word_query": 31.9079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5664.0833, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5093, "query_norm": 1.604, "queue_k_norm": 1.5694, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2454, "sent_len_1": 66.6199, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2738, "stdk": 0.0477, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.6945, "doc_norm": 1.5654, "encoder_q-embeddings": 2274.7119, "encoder_q-layer.0": 1670.3693, "encoder_q-layer.1": 1894.302, "encoder_q-layer.10": 2124.9485, "encoder_q-layer.11": 5224.2485, "encoder_q-layer.2": 2207.7786, "encoder_q-layer.3": 2377.0022, "encoder_q-layer.4": 2476.3599, "encoder_q-layer.5": 2518.2688, "encoder_q-layer.6": 2710.6089, "encoder_q-layer.7": 2478.0898, "encoder_q-layer.8": 2488.1016, "encoder_q-layer.9": 1972.2957, "epoch": 0.1, "inbatch_neg_score": 0.5176, "inbatch_pos_score": 1.1289, "learning_rate": 4.961111111111111e-05, "loss": 3.6945, "norm_diff": 0.07, "norm_loss": 0.0, "num_token_doc": 66.621, "num_token_overlap": 15.7734, "num_token_query": 42.3109, "num_token_union": 68.4107, "num_word_context": 202.2742, "num_word_doc": 49.7061, "num_word_query": 31.9476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3768.0155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5156, "query_norm": 1.6354, "queue_k_norm": 1.5685, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3109, "sent_len_1": 66.621, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0475, "stdk": 0.0475, "stdq": 0.0446, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.6686, "doc_norm": 1.5675, "encoder_q-embeddings": 4829.3257, "encoder_q-layer.0": 3396.2073, "encoder_q-layer.1": 3575.8975, "encoder_q-layer.10": 1741.6079, "encoder_q-layer.11": 4148.1885, "encoder_q-layer.2": 4274.6826, "encoder_q-layer.3": 4357.1929, "encoder_q-layer.4": 4620.2334, "encoder_q-layer.5": 4830.5723, "encoder_q-layer.6": 4833.1802, "encoder_q-layer.7": 3781.2993, "encoder_q-layer.8": 3134.825, "encoder_q-layer.9": 1762.3129, "epoch": 0.11, "inbatch_neg_score": 0.5129, "inbatch_pos_score": 1.1182, "learning_rate": 4.955555555555556e-05, "loss": 3.6686, "norm_diff": 0.0491, "norm_loss": 0.0, "num_token_doc": 66.7954, "num_token_overlap": 15.8253, "num_token_query": 42.3746, "num_token_union": 68.5051, "num_word_context": 202.207, "num_word_doc": 49.8319, "num_word_query": 32.0002, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5876.6916, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5107, "query_norm": 1.6166, "queue_k_norm": 1.5668, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3746, "sent_len_1": 66.7954, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1488, "stdk": 0.0476, "stdq": 0.0434, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.6614, "doc_norm": 1.5711, "encoder_q-embeddings": 2359.9739, "encoder_q-layer.0": 1651.5035, "encoder_q-layer.1": 1927.6177, "encoder_q-layer.10": 1715.9214, "encoder_q-layer.11": 3738.374, "encoder_q-layer.2": 2152.4443, "encoder_q-layer.3": 2342.2771, "encoder_q-layer.4": 2518.9707, "encoder_q-layer.5": 2665.3667, "encoder_q-layer.6": 2669.1584, "encoder_q-layer.7": 2779.6235, "encoder_q-layer.8": 2109.7283, "encoder_q-layer.9": 1577.4257, "epoch": 0.11, "inbatch_neg_score": 0.4928, "inbatch_pos_score": 1.1074, "learning_rate": 4.9500000000000004e-05, "loss": 3.6614, "norm_diff": 0.054, "norm_loss": 0.0, "num_token_doc": 66.9869, "num_token_overlap": 15.7765, "num_token_query": 42.2235, "num_token_union": 68.6137, "num_word_context": 202.5768, "num_word_doc": 49.9817, "num_word_query": 31.9025, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3514.3067, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4907, "query_norm": 1.6252, "queue_k_norm": 1.5662, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2235, "sent_len_1": 66.9869, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.1163, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.6592, "doc_norm": 1.5558, "encoder_q-embeddings": 1734.1604, "encoder_q-layer.0": 1252.109, "encoder_q-layer.1": 1387.5939, "encoder_q-layer.10": 1623.2118, "encoder_q-layer.11": 3493.8931, "encoder_q-layer.2": 1577.6296, "encoder_q-layer.3": 1706.6946, "encoder_q-layer.4": 1892.4204, "encoder_q-layer.5": 1896.3154, "encoder_q-layer.6": 1967.3514, "encoder_q-layer.7": 1826.5081, "encoder_q-layer.8": 1856.6986, "encoder_q-layer.9": 1356.6428, "epoch": 0.11, "inbatch_neg_score": 0.4828, "inbatch_pos_score": 1.0752, "learning_rate": 4.9444444444444446e-05, "loss": 3.6592, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.8784, "num_token_overlap": 15.8131, "num_token_query": 42.2453, "num_token_union": 68.4934, "num_word_context": 202.4886, "num_word_doc": 49.8294, "num_word_query": 31.8979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2802.9134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4783, "query_norm": 1.5727, "queue_k_norm": 1.5644, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2453, "sent_len_1": 66.8784, "sent_len_max_0": 127.9887, "sent_len_max_1": 191.23, "stdk": 0.0473, "stdq": 0.0428, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.6681, "doc_norm": 1.5595, "encoder_q-embeddings": 5646.2505, "encoder_q-layer.0": 4331.2651, "encoder_q-layer.1": 4232.3911, "encoder_q-layer.10": 1806.064, "encoder_q-layer.11": 4111.2437, "encoder_q-layer.2": 4442.3022, "encoder_q-layer.3": 4647.9136, "encoder_q-layer.4": 4974.0796, "encoder_q-layer.5": 4868.5479, "encoder_q-layer.6": 4999.2466, "encoder_q-layer.7": 5789.7686, "encoder_q-layer.8": 4200.5762, "encoder_q-layer.9": 2065.1257, "epoch": 0.11, "inbatch_neg_score": 0.4902, "inbatch_pos_score": 1.083, "learning_rate": 4.938888888888889e-05, "loss": 3.6681, "norm_diff": 0.0618, "norm_loss": 0.0, "num_token_doc": 67.0296, "num_token_overlap": 15.8792, "num_token_query": 42.3993, "num_token_union": 68.6249, "num_word_context": 202.8841, "num_word_doc": 50.0021, "num_word_query": 32.039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6635.8786, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4858, "query_norm": 1.6213, "queue_k_norm": 1.5597, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3993, "sent_len_1": 67.0296, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6625, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.667, "doc_norm": 1.5592, "encoder_q-embeddings": 1266.5065, "encoder_q-layer.0": 907.6644, "encoder_q-layer.1": 991.1313, "encoder_q-layer.10": 887.3478, "encoder_q-layer.11": 2338.9182, "encoder_q-layer.2": 1136.1355, "encoder_q-layer.3": 1216.7035, "encoder_q-layer.4": 1342.4164, "encoder_q-layer.5": 1478.8728, "encoder_q-layer.6": 1514.0806, "encoder_q-layer.7": 1295.4343, "encoder_q-layer.8": 1008.8596, "encoder_q-layer.9": 840.2563, "epoch": 0.11, "inbatch_neg_score": 0.4563, "inbatch_pos_score": 1.0732, "learning_rate": 4.933333333333334e-05, "loss": 3.667, "norm_diff": 0.0478, "norm_loss": 0.0, "num_token_doc": 66.6541, "num_token_overlap": 15.8231, "num_token_query": 42.3757, "num_token_union": 68.4282, "num_word_context": 202.2068, "num_word_doc": 49.6921, "num_word_query": 32.0147, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1882.9329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4529, "query_norm": 1.607, "queue_k_norm": 1.5567, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3757, "sent_len_1": 66.6541, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1225, "stdk": 0.0476, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.6757, "doc_norm": 1.5477, "encoder_q-embeddings": 10419.3066, "encoder_q-layer.0": 7336.0747, "encoder_q-layer.1": 7810.627, "encoder_q-layer.10": 1015.5597, "encoder_q-layer.11": 2048.48, "encoder_q-layer.2": 8367.5889, "encoder_q-layer.3": 8460.4551, "encoder_q-layer.4": 8359.7471, "encoder_q-layer.5": 10560.9756, "encoder_q-layer.6": 9707.0166, "encoder_q-layer.7": 9511.5186, "encoder_q-layer.8": 4539.9199, "encoder_q-layer.9": 1450.4387, "epoch": 0.11, "inbatch_neg_score": 0.463, "inbatch_pos_score": 1.0645, "learning_rate": 4.927777777777778e-05, "loss": 3.6757, "norm_diff": 0.0687, "norm_loss": 0.0, "num_token_doc": 66.4687, "num_token_overlap": 15.8176, "num_token_query": 42.3207, "num_token_union": 68.3054, "num_word_context": 202.0574, "num_word_doc": 49.605, "num_word_query": 31.9755, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11725.3275, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4597, "query_norm": 1.6164, "queue_k_norm": 1.5511, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3207, "sent_len_1": 66.4687, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.5875, "stdk": 0.0472, "stdq": 0.0445, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6592, "doc_norm": 1.5477, "encoder_q-embeddings": 10623.6113, "encoder_q-layer.0": 8632.4805, "encoder_q-layer.1": 8766.0781, "encoder_q-layer.10": 1014.7219, "encoder_q-layer.11": 2873.0659, "encoder_q-layer.2": 9499.0576, "encoder_q-layer.3": 8938.333, "encoder_q-layer.4": 8371.4004, "encoder_q-layer.5": 7479.813, "encoder_q-layer.6": 8336.0039, "encoder_q-layer.7": 9353.0977, "encoder_q-layer.8": 6046.3306, "encoder_q-layer.9": 1813.3643, "epoch": 0.11, "inbatch_neg_score": 0.4727, "inbatch_pos_score": 1.1016, "learning_rate": 4.922222222222222e-05, "loss": 3.6592, "norm_diff": 0.0967, "norm_loss": 0.0, "num_token_doc": 66.5994, "num_token_overlap": 15.753, "num_token_query": 42.055, "num_token_union": 68.2956, "num_word_context": 201.7922, "num_word_doc": 49.7532, "num_word_query": 31.7562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11749.4873, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.468, "query_norm": 1.6444, "queue_k_norm": 1.5505, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.055, "sent_len_1": 66.5994, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0987, "stdk": 0.0472, "stdq": 0.0454, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.6653, "doc_norm": 1.5525, "encoder_q-embeddings": 4617.7485, "encoder_q-layer.0": 3803.2927, "encoder_q-layer.1": 4539.9458, "encoder_q-layer.10": 873.061, "encoder_q-layer.11": 2101.7158, "encoder_q-layer.2": 5154.9995, "encoder_q-layer.3": 5649.9751, "encoder_q-layer.4": 6396.5962, "encoder_q-layer.5": 7107.5571, "encoder_q-layer.6": 6306.998, "encoder_q-layer.7": 5351.5879, "encoder_q-layer.8": 2862.7493, "encoder_q-layer.9": 1084.2968, "epoch": 0.11, "inbatch_neg_score": 0.4814, "inbatch_pos_score": 1.0869, "learning_rate": 4.9166666666666665e-05, "loss": 3.6653, "norm_diff": 0.0636, "norm_loss": 0.0, "num_token_doc": 66.627, "num_token_overlap": 15.773, "num_token_query": 42.2509, "num_token_union": 68.4084, "num_word_context": 202.3221, "num_word_doc": 49.7225, "num_word_query": 31.8995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7062.9578, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4788, "query_norm": 1.6161, "queue_k_norm": 1.5509, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2509, "sent_len_1": 66.627, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7713, "stdk": 0.0476, "stdq": 0.0437, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.6564, "doc_norm": 1.5503, "encoder_q-embeddings": 1065.5746, "encoder_q-layer.0": 721.3995, "encoder_q-layer.1": 845.5465, "encoder_q-layer.10": 954.9924, "encoder_q-layer.11": 2060.657, "encoder_q-layer.2": 995.6406, "encoder_q-layer.3": 1090.8807, "encoder_q-layer.4": 1204.3434, "encoder_q-layer.5": 1257.4198, "encoder_q-layer.6": 1323.8528, "encoder_q-layer.7": 1175.5392, "encoder_q-layer.8": 1003.16, "encoder_q-layer.9": 887.5156, "epoch": 0.11, "inbatch_neg_score": 0.4631, "inbatch_pos_score": 1.0352, "learning_rate": 4.9111111111111114e-05, "loss": 3.6564, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.8807, "num_token_overlap": 15.781, "num_token_query": 42.2731, "num_token_union": 68.5828, "num_word_context": 202.6839, "num_word_doc": 49.9797, "num_word_query": 31.9218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1693.49, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4607, "query_norm": 1.5639, "queue_k_norm": 1.5487, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2731, "sent_len_1": 66.8807, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8113, "stdk": 0.0476, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.6398, "doc_norm": 1.552, "encoder_q-embeddings": 2520.5894, "encoder_q-layer.0": 1998.634, "encoder_q-layer.1": 2049.7114, "encoder_q-layer.10": 842.2516, "encoder_q-layer.11": 1936.9081, "encoder_q-layer.2": 2134.5972, "encoder_q-layer.3": 2348.0054, "encoder_q-layer.4": 2510.0564, "encoder_q-layer.5": 2485.5254, "encoder_q-layer.6": 2243.3755, "encoder_q-layer.7": 2000.406, "encoder_q-layer.8": 1635.8077, "encoder_q-layer.9": 1088.5885, "epoch": 0.11, "inbatch_neg_score": 0.4751, "inbatch_pos_score": 1.0615, "learning_rate": 4.905555555555556e-05, "loss": 3.6398, "norm_diff": 0.0388, "norm_loss": 0.0, "num_token_doc": 67.0309, "num_token_overlap": 15.8698, "num_token_query": 42.3859, "num_token_union": 68.6329, "num_word_context": 202.6239, "num_word_doc": 50.0332, "num_word_query": 32.0307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3062.0875, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4712, "query_norm": 1.5908, "queue_k_norm": 1.5475, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3859, "sent_len_1": 67.0309, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1562, "stdk": 0.0477, "stdq": 0.0435, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.6724, "doc_norm": 1.5469, "encoder_q-embeddings": 1436.9471, "encoder_q-layer.0": 1010.4738, "encoder_q-layer.1": 1139.2515, "encoder_q-layer.10": 833.5334, "encoder_q-layer.11": 1830.4858, "encoder_q-layer.2": 1331.4316, "encoder_q-layer.3": 1476.1954, "encoder_q-layer.4": 1460.0925, "encoder_q-layer.5": 1542.9901, "encoder_q-layer.6": 1543.5342, "encoder_q-layer.7": 1343.4785, "encoder_q-layer.8": 1013.9068, "encoder_q-layer.9": 704.6387, "epoch": 0.12, "inbatch_neg_score": 0.4369, "inbatch_pos_score": 1.0713, "learning_rate": 4.9e-05, "loss": 3.6724, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.5172, "num_token_overlap": 15.7866, "num_token_query": 42.3808, "num_token_union": 68.3713, "num_word_context": 202.1649, "num_word_doc": 49.6183, "num_word_query": 31.9937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1940.8524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4348, "query_norm": 1.5848, "queue_k_norm": 1.5458, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3808, "sent_len_1": 66.5172, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.27, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.6569, "doc_norm": 1.5516, "encoder_q-embeddings": 3442.1602, "encoder_q-layer.0": 2470.1086, "encoder_q-layer.1": 2786.4912, "encoder_q-layer.10": 862.8804, "encoder_q-layer.11": 1934.1047, "encoder_q-layer.2": 3230.4321, "encoder_q-layer.3": 3425.0469, "encoder_q-layer.4": 3484.4602, "encoder_q-layer.5": 3466.9136, "encoder_q-layer.6": 3425.1316, "encoder_q-layer.7": 3085.5596, "encoder_q-layer.8": 1776.2981, "encoder_q-layer.9": 829.9691, "epoch": 0.12, "inbatch_neg_score": 0.4105, "inbatch_pos_score": 1.0273, "learning_rate": 4.894444444444445e-05, "loss": 3.6569, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 66.802, "num_token_overlap": 15.8256, "num_token_query": 42.3183, "num_token_union": 68.5119, "num_word_context": 202.3304, "num_word_doc": 49.8271, "num_word_query": 31.9557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4154.0666, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4087, "query_norm": 1.5399, "queue_k_norm": 1.545, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3183, "sent_len_1": 66.802, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7962, "stdk": 0.0479, "stdq": 0.0436, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.6577, "doc_norm": 1.5425, "encoder_q-embeddings": 1299.0144, "encoder_q-layer.0": 944.5989, "encoder_q-layer.1": 1004.9183, "encoder_q-layer.10": 888.3953, "encoder_q-layer.11": 2018.8348, "encoder_q-layer.2": 1096.8779, "encoder_q-layer.3": 1130.1226, "encoder_q-layer.4": 1209.8862, "encoder_q-layer.5": 1179.2802, "encoder_q-layer.6": 1224.2905, "encoder_q-layer.7": 1124.4141, "encoder_q-layer.8": 919.7259, "encoder_q-layer.9": 813.3926, "epoch": 0.12, "inbatch_neg_score": 0.3991, "inbatch_pos_score": 1.0088, "learning_rate": 4.888888888888889e-05, "loss": 3.6577, "norm_diff": 0.0234, "norm_loss": 0.0, "num_token_doc": 66.8596, "num_token_overlap": 15.8387, "num_token_query": 42.2519, "num_token_union": 68.4844, "num_word_context": 202.4239, "num_word_doc": 49.8563, "num_word_query": 31.9129, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1721.6695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3967, "query_norm": 1.5612, "queue_k_norm": 1.5434, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2519, "sent_len_1": 66.8596, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.9675, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.6665, "doc_norm": 1.5392, "encoder_q-embeddings": 1465.412, "encoder_q-layer.0": 1049.1898, "encoder_q-layer.1": 1152.6788, "encoder_q-layer.10": 436.1513, "encoder_q-layer.11": 1052.1656, "encoder_q-layer.2": 1409.9056, "encoder_q-layer.3": 1480.6521, "encoder_q-layer.4": 1616.6058, "encoder_q-layer.5": 1881.1952, "encoder_q-layer.6": 1930.6508, "encoder_q-layer.7": 2282.3491, "encoder_q-layer.8": 1867.4308, "encoder_q-layer.9": 658.8805, "epoch": 0.12, "inbatch_neg_score": 0.4017, "inbatch_pos_score": 1.0322, "learning_rate": 4.883333333333334e-05, "loss": 3.6665, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.7788, "num_token_overlap": 15.7992, "num_token_query": 42.2111, "num_token_union": 68.4679, "num_word_context": 202.1058, "num_word_doc": 49.8527, "num_word_query": 31.8819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2208.8694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4011, "query_norm": 1.5523, "queue_k_norm": 1.538, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2111, "sent_len_1": 66.7788, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7025, "stdk": 0.0478, "stdq": 0.0436, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.6972, "doc_norm": 1.53, "encoder_q-embeddings": 631.6288, "encoder_q-layer.0": 420.702, "encoder_q-layer.1": 471.2928, "encoder_q-layer.10": 445.9716, "encoder_q-layer.11": 1084.5012, "encoder_q-layer.2": 541.2243, "encoder_q-layer.3": 566.5023, "encoder_q-layer.4": 601.3539, "encoder_q-layer.5": 640.0628, "encoder_q-layer.6": 677.8863, "encoder_q-layer.7": 654.5743, "encoder_q-layer.8": 486.1737, "encoder_q-layer.9": 366.7348, "epoch": 0.12, "inbatch_neg_score": 0.4538, "inbatch_pos_score": 1.0547, "learning_rate": 4.8777777777777775e-05, "loss": 3.6972, "norm_diff": 0.0705, "norm_loss": 0.0, "num_token_doc": 66.7413, "num_token_overlap": 15.8767, "num_token_query": 42.3766, "num_token_union": 68.4176, "num_word_context": 202.1172, "num_word_doc": 49.7319, "num_word_query": 31.9982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 912.0146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4502, "query_norm": 1.6005, "queue_k_norm": 1.5304, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3766, "sent_len_1": 66.7413, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.045, "stdk": 0.0476, "stdq": 0.0436, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.6608, "doc_norm": 1.5292, "encoder_q-embeddings": 1375.7086, "encoder_q-layer.0": 975.8776, "encoder_q-layer.1": 1169.2958, "encoder_q-layer.10": 457.3888, "encoder_q-layer.11": 938.39, "encoder_q-layer.2": 1495.4397, "encoder_q-layer.3": 1626.347, "encoder_q-layer.4": 1731.4545, "encoder_q-layer.5": 1933.6465, "encoder_q-layer.6": 1676.8458, "encoder_q-layer.7": 1352.3812, "encoder_q-layer.8": 763.0175, "encoder_q-layer.9": 399.7399, "epoch": 0.12, "inbatch_neg_score": 0.3814, "inbatch_pos_score": 1.0107, "learning_rate": 4.8722222222222224e-05, "loss": 3.6608, "norm_diff": 0.0251, "norm_loss": 0.0, "num_token_doc": 66.8846, "num_token_overlap": 15.818, "num_token_query": 42.2918, "num_token_union": 68.5441, "num_word_context": 202.1992, "num_word_doc": 49.9217, "num_word_query": 31.9313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1907.4292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3792, "query_norm": 1.5543, "queue_k_norm": 1.5254, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2918, "sent_len_1": 66.8846, "sent_len_max_0": 128.0, "sent_len_max_1": 188.835, "stdk": 0.0478, "stdq": 0.044, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.6316, "doc_norm": 1.5209, "encoder_q-embeddings": 1130.7567, "encoder_q-layer.0": 756.1526, "encoder_q-layer.1": 922.9132, "encoder_q-layer.10": 232.3374, "encoder_q-layer.11": 500.3231, "encoder_q-layer.2": 1123.0367, "encoder_q-layer.3": 1260.5013, "encoder_q-layer.4": 1234.9645, "encoder_q-layer.5": 1192.1648, "encoder_q-layer.6": 943.7275, "encoder_q-layer.7": 742.9716, "encoder_q-layer.8": 350.3658, "encoder_q-layer.9": 220.4169, "epoch": 0.12, "inbatch_neg_score": 0.3781, "inbatch_pos_score": 0.9678, "learning_rate": 4.866666666666667e-05, "loss": 3.6316, "norm_diff": 0.0255, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 15.8696, "num_token_query": 42.5497, "num_token_union": 68.5901, "num_word_context": 202.4811, "num_word_doc": 49.885, "num_word_query": 32.1728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1348.9779, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.375, "query_norm": 1.5465, "queue_k_norm": 1.5209, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5497, "sent_len_1": 66.8156, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0525, "stdk": 0.0475, "stdq": 0.0437, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.6648, "doc_norm": 1.5132, "encoder_q-embeddings": 1003.1212, "encoder_q-layer.0": 724.4605, "encoder_q-layer.1": 772.7615, "encoder_q-layer.10": 211.5752, "encoder_q-layer.11": 479.9567, "encoder_q-layer.2": 862.4065, "encoder_q-layer.3": 953.6496, "encoder_q-layer.4": 950.9493, "encoder_q-layer.5": 886.1646, "encoder_q-layer.6": 898.3486, "encoder_q-layer.7": 756.0886, "encoder_q-layer.8": 697.0749, "encoder_q-layer.9": 398.83, "epoch": 0.12, "inbatch_neg_score": 0.3735, "inbatch_pos_score": 0.9888, "learning_rate": 4.8611111111111115e-05, "loss": 3.6648, "norm_diff": 0.0183, "norm_loss": 0.0, "num_token_doc": 66.7383, "num_token_overlap": 15.7418, "num_token_query": 42.1775, "num_token_union": 68.4401, "num_word_context": 202.4892, "num_word_doc": 49.7917, "num_word_query": 31.8643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1171.818, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3718, "query_norm": 1.5315, "queue_k_norm": 1.5181, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1775, "sent_len_1": 66.7383, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2887, "stdk": 0.0475, "stdq": 0.0435, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.6448, "doc_norm": 1.5122, "encoder_q-embeddings": 532.6437, "encoder_q-layer.0": 377.2572, "encoder_q-layer.1": 430.7506, "encoder_q-layer.10": 207.504, "encoder_q-layer.11": 509.7961, "encoder_q-layer.2": 510.0453, "encoder_q-layer.3": 518.2939, "encoder_q-layer.4": 424.5955, "encoder_q-layer.5": 435.7979, "encoder_q-layer.6": 389.7979, "encoder_q-layer.7": 344.5247, "encoder_q-layer.8": 272.9863, "encoder_q-layer.9": 180.5206, "epoch": 0.12, "inbatch_neg_score": 0.3956, "inbatch_pos_score": 1.0107, "learning_rate": 4.855555555555556e-05, "loss": 3.6448, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.7905, "num_token_overlap": 15.7944, "num_token_query": 42.2288, "num_token_union": 68.4347, "num_word_context": 201.9479, "num_word_doc": 49.7837, "num_word_query": 31.884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 619.1962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3933, "query_norm": 1.5292, "queue_k_norm": 1.5079, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2288, "sent_len_1": 66.7905, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.7775, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.6443, "doc_norm": 1.4968, "encoder_q-embeddings": 2705.7686, "encoder_q-layer.0": 1930.3715, "encoder_q-layer.1": 2452.2122, "encoder_q-layer.10": 216.5671, "encoder_q-layer.11": 529.2564, "encoder_q-layer.2": 2606.7297, "encoder_q-layer.3": 2855.6453, "encoder_q-layer.4": 2908.4597, "encoder_q-layer.5": 2833.9175, "encoder_q-layer.6": 2528.7996, "encoder_q-layer.7": 1454.4542, "encoder_q-layer.8": 522.585, "encoder_q-layer.9": 222.829, "epoch": 0.12, "inbatch_neg_score": 0.4363, "inbatch_pos_score": 1.0508, "learning_rate": 4.85e-05, "loss": 3.6443, "norm_diff": 0.0933, "norm_loss": 0.0, "num_token_doc": 66.9658, "num_token_overlap": 15.8436, "num_token_query": 42.2912, "num_token_union": 68.5713, "num_word_context": 202.4841, "num_word_doc": 49.9711, "num_word_query": 31.9519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3145.2126, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4365, "query_norm": 1.5901, "queue_k_norm": 1.5014, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2912, "sent_len_1": 66.9658, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6163, "stdk": 0.0472, "stdq": 0.0441, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.6264, "doc_norm": 1.4951, "encoder_q-embeddings": 3851.4961, "encoder_q-layer.0": 2709.7029, "encoder_q-layer.1": 3135.4607, "encoder_q-layer.10": 234.9011, "encoder_q-layer.11": 549.0968, "encoder_q-layer.2": 3867.4929, "encoder_q-layer.3": 3414.7969, "encoder_q-layer.4": 3513.3416, "encoder_q-layer.5": 3172.2498, "encoder_q-layer.6": 2501.4863, "encoder_q-layer.7": 2328.9629, "encoder_q-layer.8": 1819.7277, "encoder_q-layer.9": 409.7407, "epoch": 0.12, "inbatch_neg_score": 0.4188, "inbatch_pos_score": 1.0381, "learning_rate": 4.844444444444445e-05, "loss": 3.6264, "norm_diff": 0.0599, "norm_loss": 0.0, "num_token_doc": 66.8188, "num_token_overlap": 15.8945, "num_token_query": 42.5519, "num_token_union": 68.6068, "num_word_context": 202.3353, "num_word_doc": 49.8621, "num_word_query": 32.1658, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4129.0105, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4167, "query_norm": 1.5549, "queue_k_norm": 1.4978, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5519, "sent_len_1": 66.8188, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1775, "stdk": 0.0473, "stdq": 0.0437, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.6482, "doc_norm": 1.4886, "encoder_q-embeddings": 1938.9053, "encoder_q-layer.0": 1390.2095, "encoder_q-layer.1": 1728.9862, "encoder_q-layer.10": 245.7248, "encoder_q-layer.11": 524.0852, "encoder_q-layer.2": 1885.3988, "encoder_q-layer.3": 1881.3833, "encoder_q-layer.4": 1904.1483, "encoder_q-layer.5": 1729.9241, "encoder_q-layer.6": 1987.8984, "encoder_q-layer.7": 1656.7764, "encoder_q-layer.8": 1187.1477, "encoder_q-layer.9": 325.5423, "epoch": 0.13, "inbatch_neg_score": 0.3931, "inbatch_pos_score": 0.9829, "learning_rate": 4.838888888888889e-05, "loss": 3.6482, "norm_diff": 0.0243, "norm_loss": 0.0, "num_token_doc": 66.8018, "num_token_overlap": 15.8221, "num_token_query": 42.3612, "num_token_union": 68.4584, "num_word_context": 202.1872, "num_word_doc": 49.8328, "num_word_query": 31.9743, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2324.1393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3914, "query_norm": 1.5129, "queue_k_norm": 1.491, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3612, "sent_len_1": 66.8018, "sent_len_max_0": 127.99, "sent_len_max_1": 188.82, "stdk": 0.0471, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.6403, "doc_norm": 1.486, "encoder_q-embeddings": 1209.7153, "encoder_q-layer.0": 862.5216, "encoder_q-layer.1": 1033.46, "encoder_q-layer.10": 220.8537, "encoder_q-layer.11": 518.7521, "encoder_q-layer.2": 1175.7526, "encoder_q-layer.3": 1120.9059, "encoder_q-layer.4": 966.0975, "encoder_q-layer.5": 744.8325, "encoder_q-layer.6": 674.8444, "encoder_q-layer.7": 464.5942, "encoder_q-layer.8": 303.3642, "encoder_q-layer.9": 199.4258, "epoch": 0.13, "inbatch_neg_score": 0.376, "inbatch_pos_score": 0.9526, "learning_rate": 4.8333333333333334e-05, "loss": 3.6403, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.7952, "num_token_overlap": 15.7788, "num_token_query": 42.2323, "num_token_union": 68.4685, "num_word_context": 202.0279, "num_word_doc": 49.8136, "num_word_query": 31.8658, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1250.663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3748, "query_norm": 1.4961, "queue_k_norm": 1.4854, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2323, "sent_len_1": 66.7952, "sent_len_max_0": 127.985, "sent_len_max_1": 190.7887, "stdk": 0.0472, "stdq": 0.0425, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.652, "doc_norm": 1.4822, "encoder_q-embeddings": 882.303, "encoder_q-layer.0": 693.7819, "encoder_q-layer.1": 757.4382, "encoder_q-layer.10": 261.864, "encoder_q-layer.11": 536.7642, "encoder_q-layer.2": 800.397, "encoder_q-layer.3": 860.0333, "encoder_q-layer.4": 824.754, "encoder_q-layer.5": 803.7974, "encoder_q-layer.6": 905.0032, "encoder_q-layer.7": 766.7606, "encoder_q-layer.8": 439.1539, "encoder_q-layer.9": 237.4521, "epoch": 0.13, "inbatch_neg_score": 0.3552, "inbatch_pos_score": 0.9722, "learning_rate": 4.8277777777777776e-05, "loss": 3.652, "norm_diff": 0.0865, "norm_loss": 0.0, "num_token_doc": 66.8064, "num_token_overlap": 15.829, "num_token_query": 42.3079, "num_token_union": 68.4358, "num_word_context": 202.4363, "num_word_doc": 49.8694, "num_word_query": 31.9377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1072.5937, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3521, "query_norm": 1.5687, "queue_k_norm": 1.4846, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3079, "sent_len_1": 66.8064, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0213, "stdk": 0.0472, "stdq": 0.0458, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.6447, "doc_norm": 1.4812, "encoder_q-embeddings": 3560.863, "encoder_q-layer.0": 2811.9248, "encoder_q-layer.1": 3001.293, "encoder_q-layer.10": 205.8442, "encoder_q-layer.11": 484.5663, "encoder_q-layer.2": 3512.7908, "encoder_q-layer.3": 3502.4893, "encoder_q-layer.4": 3676.5264, "encoder_q-layer.5": 3694.5745, "encoder_q-layer.6": 3064.7517, "encoder_q-layer.7": 3211.0278, "encoder_q-layer.8": 1479.9967, "encoder_q-layer.9": 292.8743, "epoch": 0.13, "inbatch_neg_score": 0.3968, "inbatch_pos_score": 1.0195, "learning_rate": 4.8222222222222225e-05, "loss": 3.6447, "norm_diff": 0.069, "norm_loss": 0.0, "num_token_doc": 66.7819, "num_token_overlap": 15.8085, "num_token_query": 42.2555, "num_token_union": 68.4874, "num_word_context": 202.3962, "num_word_doc": 49.8766, "num_word_query": 31.9403, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4310.9613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3967, "query_norm": 1.5502, "queue_k_norm": 1.4808, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2555, "sent_len_1": 66.7819, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5475, "stdk": 0.0473, "stdq": 0.0448, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.6412, "doc_norm": 1.4794, "encoder_q-embeddings": 1352.838, "encoder_q-layer.0": 955.795, "encoder_q-layer.1": 1021.2394, "encoder_q-layer.10": 259.5074, "encoder_q-layer.11": 622.4047, "encoder_q-layer.2": 1231.7211, "encoder_q-layer.3": 1298.3669, "encoder_q-layer.4": 1338.4202, "encoder_q-layer.5": 1284.2803, "encoder_q-layer.6": 1334.1956, "encoder_q-layer.7": 1224.2709, "encoder_q-layer.8": 772.9865, "encoder_q-layer.9": 269.2518, "epoch": 0.13, "inbatch_neg_score": 0.4037, "inbatch_pos_score": 1.0234, "learning_rate": 4.8166666666666674e-05, "loss": 3.6412, "norm_diff": 0.088, "norm_loss": 0.0, "num_token_doc": 66.7718, "num_token_overlap": 15.7598, "num_token_query": 42.2345, "num_token_union": 68.4466, "num_word_context": 202.273, "num_word_doc": 49.8287, "num_word_query": 31.8875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1588.7228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4021, "query_norm": 1.5674, "queue_k_norm": 1.4765, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2345, "sent_len_1": 66.7718, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1725, "stdk": 0.0474, "stdq": 0.0454, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.6758, "doc_norm": 1.4705, "encoder_q-embeddings": 2140.354, "encoder_q-layer.0": 1679.5165, "encoder_q-layer.1": 1971.725, "encoder_q-layer.10": 236.3609, "encoder_q-layer.11": 554.4525, "encoder_q-layer.2": 2208.1348, "encoder_q-layer.3": 2243.3516, "encoder_q-layer.4": 1690.8481, "encoder_q-layer.5": 834.4469, "encoder_q-layer.6": 712.267, "encoder_q-layer.7": 602.6863, "encoder_q-layer.8": 345.1406, "encoder_q-layer.9": 201.8653, "epoch": 0.13, "inbatch_neg_score": 0.3858, "inbatch_pos_score": 1.0107, "learning_rate": 4.811111111111111e-05, "loss": 3.6758, "norm_diff": 0.0688, "norm_loss": 0.0, "num_token_doc": 66.6717, "num_token_overlap": 15.8936, "num_token_query": 42.3623, "num_token_union": 68.4018, "num_word_context": 202.3065, "num_word_doc": 49.7874, "num_word_query": 31.9853, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2199.0361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3833, "query_norm": 1.5393, "queue_k_norm": 1.4744, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3623, "sent_len_1": 66.6717, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6788, "stdk": 0.0471, "stdq": 0.0447, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.6275, "doc_norm": 1.4703, "encoder_q-embeddings": 533.6014, "encoder_q-layer.0": 371.0254, "encoder_q-layer.1": 445.3695, "encoder_q-layer.10": 197.4287, "encoder_q-layer.11": 460.1283, "encoder_q-layer.2": 555.1744, "encoder_q-layer.3": 727.3921, "encoder_q-layer.4": 522.7747, "encoder_q-layer.5": 341.6047, "encoder_q-layer.6": 363.1355, "encoder_q-layer.7": 273.4984, "encoder_q-layer.8": 230.3652, "encoder_q-layer.9": 182.3353, "epoch": 0.13, "inbatch_neg_score": 0.3655, "inbatch_pos_score": 0.9775, "learning_rate": 4.805555555555556e-05, "loss": 3.6275, "norm_diff": 0.0615, "norm_loss": 0.0, "num_token_doc": 66.841, "num_token_overlap": 15.8262, "num_token_query": 42.2731, "num_token_union": 68.4151, "num_word_context": 202.0151, "num_word_doc": 49.8673, "num_word_query": 31.9083, "postclip_grad_norm": 1.0, "preclip_grad_norm": 637.5631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3628, "query_norm": 1.5318, "queue_k_norm": 1.4705, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2731, "sent_len_1": 66.841, "sent_len_max_0": 127.995, "sent_len_max_1": 191.21, "stdk": 0.0472, "stdq": 0.0438, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.6105, "doc_norm": 1.4745, "encoder_q-embeddings": 1838.2029, "encoder_q-layer.0": 1294.1565, "encoder_q-layer.1": 1576.6207, "encoder_q-layer.10": 205.8559, "encoder_q-layer.11": 457.0773, "encoder_q-layer.2": 1515.3829, "encoder_q-layer.3": 1554.1625, "encoder_q-layer.4": 1485.1328, "encoder_q-layer.5": 1259.6385, "encoder_q-layer.6": 1047.7278, "encoder_q-layer.7": 974.091, "encoder_q-layer.8": 743.863, "encoder_q-layer.9": 251.8223, "epoch": 0.13, "inbatch_neg_score": 0.3542, "inbatch_pos_score": 1.0205, "learning_rate": 4.8e-05, "loss": 3.6105, "norm_diff": 0.0623, "norm_loss": 0.0, "num_token_doc": 66.866, "num_token_overlap": 15.8944, "num_token_query": 42.5999, "num_token_union": 68.5967, "num_word_context": 202.6684, "num_word_doc": 49.8894, "num_word_query": 32.2037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1870.0179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3533, "query_norm": 1.5368, "queue_k_norm": 1.4696, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5999, "sent_len_1": 66.866, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.495, "stdk": 0.0474, "stdq": 0.0449, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.6347, "doc_norm": 1.4627, "encoder_q-embeddings": 1891.4081, "encoder_q-layer.0": 1535.6001, "encoder_q-layer.1": 1723.8555, "encoder_q-layer.10": 226.1873, "encoder_q-layer.11": 544.061, "encoder_q-layer.2": 1904.8138, "encoder_q-layer.3": 1880.5657, "encoder_q-layer.4": 1665.5842, "encoder_q-layer.5": 1542.251, "encoder_q-layer.6": 1650.217, "encoder_q-layer.7": 1315.2288, "encoder_q-layer.8": 783.8667, "encoder_q-layer.9": 279.4107, "epoch": 0.13, "inbatch_neg_score": 0.3732, "inbatch_pos_score": 0.9805, "learning_rate": 4.794444444444445e-05, "loss": 3.6347, "norm_diff": 0.0666, "norm_loss": 0.0, "num_token_doc": 66.6751, "num_token_overlap": 15.7893, "num_token_query": 42.3404, "num_token_union": 68.4636, "num_word_context": 202.2567, "num_word_doc": 49.7459, "num_word_query": 32.0017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2170.4628, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3699, "query_norm": 1.5292, "queue_k_norm": 1.4658, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3404, "sent_len_1": 66.6751, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2875, "stdk": 0.0471, "stdq": 0.0443, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.6306, "doc_norm": 1.4622, "encoder_q-embeddings": 1380.8768, "encoder_q-layer.0": 954.4918, "encoder_q-layer.1": 1048.7426, "encoder_q-layer.10": 238.9579, "encoder_q-layer.11": 650.5419, "encoder_q-layer.2": 1198.4504, "encoder_q-layer.3": 1273.3082, "encoder_q-layer.4": 1307.8505, "encoder_q-layer.5": 1133.8387, "encoder_q-layer.6": 1283.4324, "encoder_q-layer.7": 993.6183, "encoder_q-layer.8": 457.9189, "encoder_q-layer.9": 212.7559, "epoch": 0.13, "inbatch_neg_score": 0.3806, "inbatch_pos_score": 0.9458, "learning_rate": 4.7888888888888886e-05, "loss": 3.6306, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.7857, "num_token_overlap": 15.86, "num_token_query": 42.3077, "num_token_union": 68.507, "num_word_context": 202.2335, "num_word_doc": 49.8575, "num_word_query": 31.9651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1533.733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3777, "query_norm": 1.4715, "queue_k_norm": 1.4637, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3077, "sent_len_1": 66.7857, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.4187, "stdk": 0.0471, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.6524, "doc_norm": 1.4636, "encoder_q-embeddings": 1438.3424, "encoder_q-layer.0": 1020.9783, "encoder_q-layer.1": 1249.015, "encoder_q-layer.10": 228.387, "encoder_q-layer.11": 537.8481, "encoder_q-layer.2": 1339.244, "encoder_q-layer.3": 1389.5332, "encoder_q-layer.4": 1305.9465, "encoder_q-layer.5": 1205.4268, "encoder_q-layer.6": 1186.4507, "encoder_q-layer.7": 997.9376, "encoder_q-layer.8": 553.6725, "encoder_q-layer.9": 216.812, "epoch": 0.14, "inbatch_neg_score": 0.3984, "inbatch_pos_score": 0.9995, "learning_rate": 4.7833333333333335e-05, "loss": 3.6524, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.7388, "num_token_overlap": 15.8572, "num_token_query": 42.4226, "num_token_union": 68.4283, "num_word_context": 202.2343, "num_word_doc": 49.7586, "num_word_query": 32.0477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1600.8822, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3962, "query_norm": 1.5011, "queue_k_norm": 1.4592, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4226, "sent_len_1": 66.7388, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7825, "stdk": 0.0473, "stdq": 0.0434, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.6735, "doc_norm": 1.4605, "encoder_q-embeddings": 981.5521, "encoder_q-layer.0": 749.4828, "encoder_q-layer.1": 776.5739, "encoder_q-layer.10": 228.7417, "encoder_q-layer.11": 541.694, "encoder_q-layer.2": 882.9267, "encoder_q-layer.3": 974.4882, "encoder_q-layer.4": 983.3792, "encoder_q-layer.5": 966.1269, "encoder_q-layer.6": 1023.3126, "encoder_q-layer.7": 858.9694, "encoder_q-layer.8": 467.1602, "encoder_q-layer.9": 209.7762, "epoch": 0.14, "inbatch_neg_score": 0.3643, "inbatch_pos_score": 0.9741, "learning_rate": 4.7777777777777784e-05, "loss": 3.6735, "norm_diff": 0.0416, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 15.7753, "num_token_query": 42.2684, "num_token_union": 68.4794, "num_word_context": 202.3438, "num_word_doc": 49.7848, "num_word_query": 31.9311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1188.4795, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.364, "query_norm": 1.5021, "queue_k_norm": 1.4556, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2684, "sent_len_1": 66.7179, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3625, "stdk": 0.0473, "stdq": 0.0439, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.6348, "doc_norm": 1.4524, "encoder_q-embeddings": 927.4137, "encoder_q-layer.0": 721.0223, "encoder_q-layer.1": 759.0781, "encoder_q-layer.10": 244.8837, "encoder_q-layer.11": 546.8771, "encoder_q-layer.2": 875.3581, "encoder_q-layer.3": 968.2504, "encoder_q-layer.4": 956.823, "encoder_q-layer.5": 928.8173, "encoder_q-layer.6": 973.463, "encoder_q-layer.7": 853.6871, "encoder_q-layer.8": 568.5091, "encoder_q-layer.9": 249.1922, "epoch": 0.14, "inbatch_neg_score": 0.3409, "inbatch_pos_score": 0.96, "learning_rate": 4.7722222222222226e-05, "loss": 3.6348, "norm_diff": 0.1482, "norm_loss": 0.0, "num_token_doc": 66.7125, "num_token_overlap": 15.82, "num_token_query": 42.2391, "num_token_union": 68.3812, "num_word_context": 201.9911, "num_word_doc": 49.7656, "num_word_query": 31.9133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1142.0303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3403, "query_norm": 1.6007, "queue_k_norm": 1.4538, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2391, "sent_len_1": 66.7125, "sent_len_max_0": 127.985, "sent_len_max_1": 190.2862, "stdk": 0.047, "stdq": 0.0473, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6129, "doc_norm": 1.4454, "encoder_q-embeddings": 883.2714, "encoder_q-layer.0": 627.7247, "encoder_q-layer.1": 702.774, "encoder_q-layer.10": 226.1589, "encoder_q-layer.11": 514.7032, "encoder_q-layer.2": 792.9099, "encoder_q-layer.3": 872.9753, "encoder_q-layer.4": 942.4001, "encoder_q-layer.5": 881.9105, "encoder_q-layer.6": 846.4159, "encoder_q-layer.7": 647.4636, "encoder_q-layer.8": 454.7365, "encoder_q-layer.9": 186.7476, "epoch": 0.14, "inbatch_neg_score": 0.3607, "inbatch_pos_score": 0.9683, "learning_rate": 4.766666666666667e-05, "loss": 3.6129, "norm_diff": 0.0845, "norm_loss": 0.0, "num_token_doc": 66.6324, "num_token_overlap": 15.829, "num_token_query": 42.3555, "num_token_union": 68.4277, "num_word_context": 201.8858, "num_word_doc": 49.6896, "num_word_query": 31.9628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1044.674, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3594, "query_norm": 1.5299, "queue_k_norm": 1.4512, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3555, "sent_len_1": 66.6324, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3137, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.6134, "doc_norm": 1.444, "encoder_q-embeddings": 1618.3792, "encoder_q-layer.0": 1080.6963, "encoder_q-layer.1": 1261.4647, "encoder_q-layer.10": 225.9965, "encoder_q-layer.11": 498.4088, "encoder_q-layer.2": 1512.2975, "encoder_q-layer.3": 1319.9061, "encoder_q-layer.4": 1234.8752, "encoder_q-layer.5": 1090.1576, "encoder_q-layer.6": 950.032, "encoder_q-layer.7": 772.7009, "encoder_q-layer.8": 472.2374, "encoder_q-layer.9": 206.4091, "epoch": 0.14, "inbatch_neg_score": 0.3341, "inbatch_pos_score": 0.9438, "learning_rate": 4.761111111111111e-05, "loss": 3.6134, "norm_diff": 0.1323, "norm_loss": 0.0, "num_token_doc": 66.7913, "num_token_overlap": 15.796, "num_token_query": 42.329, "num_token_union": 68.5002, "num_word_context": 202.4956, "num_word_doc": 49.8699, "num_word_query": 31.9891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1579.2037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3323, "query_norm": 1.5763, "queue_k_norm": 1.4476, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.329, "sent_len_1": 66.7913, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0662, "stdk": 0.0468, "stdq": 0.0454, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.6402, "doc_norm": 1.4439, "encoder_q-embeddings": 3845.5432, "encoder_q-layer.0": 2432.4502, "encoder_q-layer.1": 2886.0894, "encoder_q-layer.10": 394.5276, "encoder_q-layer.11": 1005.7029, "encoder_q-layer.2": 3640.562, "encoder_q-layer.3": 3993.0806, "encoder_q-layer.4": 4507.3179, "encoder_q-layer.5": 3674.5781, "encoder_q-layer.6": 4306.6313, "encoder_q-layer.7": 2563.3052, "encoder_q-layer.8": 1220.2446, "encoder_q-layer.9": 464.3118, "epoch": 0.14, "inbatch_neg_score": 0.3381, "inbatch_pos_score": 0.9233, "learning_rate": 4.755555555555556e-05, "loss": 3.6402, "norm_diff": 0.0501, "norm_loss": 0.0, "num_token_doc": 67.0175, "num_token_overlap": 15.8578, "num_token_query": 42.1254, "num_token_union": 68.4763, "num_word_context": 202.5397, "num_word_doc": 50.0047, "num_word_query": 31.8155, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4518.3068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.335, "query_norm": 1.4941, "queue_k_norm": 1.4458, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1254, "sent_len_1": 67.0175, "sent_len_max_0": 128.0, "sent_len_max_1": 189.545, "stdk": 0.0468, "stdq": 0.0426, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.6145, "doc_norm": 1.4408, "encoder_q-embeddings": 17328.9785, "encoder_q-layer.0": 13362.8916, "encoder_q-layer.1": 13169.5059, "encoder_q-layer.10": 495.2995, "encoder_q-layer.11": 1082.0934, "encoder_q-layer.2": 14782.1885, "encoder_q-layer.3": 16607.2461, "encoder_q-layer.4": 16763.2949, "encoder_q-layer.5": 14677.7881, "encoder_q-layer.6": 13223.7197, "encoder_q-layer.7": 13965.0771, "encoder_q-layer.8": 8522.6016, "encoder_q-layer.9": 2597.6514, "epoch": 0.14, "inbatch_neg_score": 0.3481, "inbatch_pos_score": 0.9365, "learning_rate": 4.75e-05, "loss": 3.6145, "norm_diff": 0.0789, "norm_loss": 0.0, "num_token_doc": 66.889, "num_token_overlap": 15.7692, "num_token_query": 42.3133, "num_token_union": 68.567, "num_word_context": 202.4345, "num_word_doc": 49.8873, "num_word_query": 31.9561, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19394.3387, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.3462, "query_norm": 1.5196, "queue_k_norm": 1.4428, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3133, "sent_len_1": 66.889, "sent_len_max_0": 127.9862, "sent_len_max_1": 189.8237, "stdk": 0.0468, "stdq": 0.0433, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6073, "doc_norm": 1.4384, "encoder_q-embeddings": 1348.5927, "encoder_q-layer.0": 1002.6306, "encoder_q-layer.1": 1168.087, "encoder_q-layer.10": 208.7794, "encoder_q-layer.11": 491.8707, "encoder_q-layer.2": 1334.0894, "encoder_q-layer.3": 1364.458, "encoder_q-layer.4": 1384.0255, "encoder_q-layer.5": 1278.6073, "encoder_q-layer.6": 1190.0389, "encoder_q-layer.7": 884.312, "encoder_q-layer.8": 557.0098, "encoder_q-layer.9": 252.0257, "epoch": 0.14, "inbatch_neg_score": 0.3698, "inbatch_pos_score": 0.9961, "learning_rate": 4.7444444444444445e-05, "loss": 3.6073, "norm_diff": 0.1425, "norm_loss": 0.0, "num_token_doc": 66.7327, "num_token_overlap": 15.8053, "num_token_query": 42.3896, "num_token_union": 68.5216, "num_word_context": 202.4194, "num_word_doc": 49.7876, "num_word_query": 32.0186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1573.6652, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3687, "query_norm": 1.581, "queue_k_norm": 1.4387, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3896, "sent_len_1": 66.7327, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2488, "stdk": 0.0468, "stdq": 0.0451, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6145, "doc_norm": 1.4433, "encoder_q-embeddings": 1095.1249, "encoder_q-layer.0": 862.584, "encoder_q-layer.1": 888.6069, "encoder_q-layer.10": 226.7588, "encoder_q-layer.11": 558.9371, "encoder_q-layer.2": 884.757, "encoder_q-layer.3": 824.0624, "encoder_q-layer.4": 705.6808, "encoder_q-layer.5": 701.3045, "encoder_q-layer.6": 736.1487, "encoder_q-layer.7": 679.2436, "encoder_q-layer.8": 446.4012, "encoder_q-layer.9": 235.7838, "epoch": 0.14, "inbatch_neg_score": 0.3959, "inbatch_pos_score": 1.0176, "learning_rate": 4.7388888888888894e-05, "loss": 3.6145, "norm_diff": 0.0954, "norm_loss": 0.0, "num_token_doc": 66.8695, "num_token_overlap": 15.895, "num_token_query": 42.4137, "num_token_union": 68.5335, "num_word_context": 202.2604, "num_word_doc": 49.8793, "num_word_query": 32.0506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1116.8742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3955, "query_norm": 1.5387, "queue_k_norm": 1.4412, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4137, "sent_len_1": 66.8695, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8837, "stdk": 0.047, "stdq": 0.0443, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6273, "doc_norm": 1.4404, "encoder_q-embeddings": 1503.5417, "encoder_q-layer.0": 1066.8594, "encoder_q-layer.1": 1312.6689, "encoder_q-layer.10": 200.3425, "encoder_q-layer.11": 481.09, "encoder_q-layer.2": 1462.2158, "encoder_q-layer.3": 1497.9567, "encoder_q-layer.4": 1502.5774, "encoder_q-layer.5": 1380.1433, "encoder_q-layer.6": 1363.7524, "encoder_q-layer.7": 1094.8796, "encoder_q-layer.8": 620.7773, "encoder_q-layer.9": 238.8181, "epoch": 0.14, "inbatch_neg_score": 0.4008, "inbatch_pos_score": 1.0244, "learning_rate": 4.7333333333333336e-05, "loss": 3.6273, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.8123, "num_token_overlap": 15.918, "num_token_query": 42.5021, "num_token_union": 68.4741, "num_word_context": 202.2033, "num_word_doc": 49.8585, "num_word_query": 32.1327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1746.3661, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3987, "query_norm": 1.5444, "queue_k_norm": 1.4408, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5021, "sent_len_1": 66.8123, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9712, "stdk": 0.0468, "stdq": 0.0445, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.6314, "doc_norm": 1.4435, "encoder_q-embeddings": 612.7099, "encoder_q-layer.0": 439.1038, "encoder_q-layer.1": 451.1738, "encoder_q-layer.10": 221.1584, "encoder_q-layer.11": 503.7322, "encoder_q-layer.2": 488.432, "encoder_q-layer.3": 536.3431, "encoder_q-layer.4": 587.9492, "encoder_q-layer.5": 535.9677, "encoder_q-layer.6": 528.1688, "encoder_q-layer.7": 402.9674, "encoder_q-layer.8": 342.2929, "encoder_q-layer.9": 209.2661, "epoch": 0.15, "inbatch_neg_score": 0.4069, "inbatch_pos_score": 1.0244, "learning_rate": 4.727777777777778e-05, "loss": 3.6314, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 66.5985, "num_token_overlap": 15.8123, "num_token_query": 42.3363, "num_token_union": 68.3638, "num_word_context": 201.9584, "num_word_doc": 49.6659, "num_word_query": 32.0013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 699.9033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4062, "query_norm": 1.5301, "queue_k_norm": 1.4382, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3363, "sent_len_1": 66.5985, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.605, "stdk": 0.0469, "stdq": 0.0434, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.647, "doc_norm": 1.4424, "encoder_q-embeddings": 670.0572, "encoder_q-layer.0": 502.5867, "encoder_q-layer.1": 550.7589, "encoder_q-layer.10": 257.5258, "encoder_q-layer.11": 662.0413, "encoder_q-layer.2": 606.9729, "encoder_q-layer.3": 631.442, "encoder_q-layer.4": 659.8782, "encoder_q-layer.5": 650.7775, "encoder_q-layer.6": 707.5585, "encoder_q-layer.7": 656.0745, "encoder_q-layer.8": 642.4582, "encoder_q-layer.9": 392.7081, "epoch": 0.15, "inbatch_neg_score": 0.3815, "inbatch_pos_score": 0.998, "learning_rate": 4.722222222222222e-05, "loss": 3.647, "norm_diff": 0.1744, "norm_loss": 0.0, "num_token_doc": 66.6061, "num_token_overlap": 15.8118, "num_token_query": 42.32, "num_token_union": 68.3967, "num_word_context": 201.8475, "num_word_doc": 49.7101, "num_word_query": 31.9721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 915.892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3804, "query_norm": 1.6168, "queue_k_norm": 1.4411, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.32, "sent_len_1": 66.6061, "sent_len_max_0": 128.0, "sent_len_max_1": 187.955, "stdk": 0.0468, "stdq": 0.0464, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.6605, "doc_norm": 1.4458, "encoder_q-embeddings": 2080.2258, "encoder_q-layer.0": 1511.5367, "encoder_q-layer.1": 1590.7085, "encoder_q-layer.10": 208.2964, "encoder_q-layer.11": 495.269, "encoder_q-layer.2": 1819.7743, "encoder_q-layer.3": 1782.6702, "encoder_q-layer.4": 1503.6056, "encoder_q-layer.5": 1138.5911, "encoder_q-layer.6": 651.0869, "encoder_q-layer.7": 462.3205, "encoder_q-layer.8": 338.7448, "encoder_q-layer.9": 215.6069, "epoch": 0.15, "inbatch_neg_score": 0.4051, "inbatch_pos_score": 1.0137, "learning_rate": 4.716666666666667e-05, "loss": 3.6605, "norm_diff": 0.0834, "norm_loss": 0.0, "num_token_doc": 66.9583, "num_token_overlap": 15.8289, "num_token_query": 42.3192, "num_token_union": 68.5819, "num_word_context": 202.4596, "num_word_doc": 49.9557, "num_word_query": 31.9661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1926.6013, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.405, "query_norm": 1.5291, "queue_k_norm": 1.4422, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3192, "sent_len_1": 66.9583, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.5712, "stdk": 0.0469, "stdq": 0.0446, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6399, "doc_norm": 1.4404, "encoder_q-embeddings": 542.6953, "encoder_q-layer.0": 382.4556, "encoder_q-layer.1": 425.3217, "encoder_q-layer.10": 211.8312, "encoder_q-layer.11": 453.4993, "encoder_q-layer.2": 459.6691, "encoder_q-layer.3": 459.1848, "encoder_q-layer.4": 442.5325, "encoder_q-layer.5": 387.7228, "encoder_q-layer.6": 413.9653, "encoder_q-layer.7": 384.6158, "encoder_q-layer.8": 306.7015, "encoder_q-layer.9": 229.5069, "epoch": 0.15, "inbatch_neg_score": 0.389, "inbatch_pos_score": 0.9932, "learning_rate": 4.711111111111111e-05, "loss": 3.6399, "norm_diff": 0.1145, "norm_loss": 0.0, "num_token_doc": 66.8167, "num_token_overlap": 15.7412, "num_token_query": 42.1229, "num_token_union": 68.4706, "num_word_context": 202.2352, "num_word_doc": 49.8472, "num_word_query": 31.8197, "postclip_grad_norm": 1.0, "preclip_grad_norm": 608.0447, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3894, "query_norm": 1.5549, "queue_k_norm": 1.4424, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1229, "sent_len_1": 66.8167, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1438, "stdk": 0.0466, "stdq": 0.0448, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.6641, "doc_norm": 1.4368, "encoder_q-embeddings": 1123.8959, "encoder_q-layer.0": 828.4427, "encoder_q-layer.1": 923.4548, "encoder_q-layer.10": 212.6833, "encoder_q-layer.11": 498.8544, "encoder_q-layer.2": 1015.2859, "encoder_q-layer.3": 1054.2474, "encoder_q-layer.4": 1091.2915, "encoder_q-layer.5": 1116.4994, "encoder_q-layer.6": 1205.3749, "encoder_q-layer.7": 1177.7827, "encoder_q-layer.8": 778.537, "encoder_q-layer.9": 285.5925, "epoch": 0.15, "inbatch_neg_score": 0.3864, "inbatch_pos_score": 0.9683, "learning_rate": 4.7055555555555555e-05, "loss": 3.6641, "norm_diff": 0.1663, "norm_loss": 0.0, "num_token_doc": 66.826, "num_token_overlap": 15.7944, "num_token_query": 42.08, "num_token_union": 68.384, "num_word_context": 202.0481, "num_word_doc": 49.8196, "num_word_query": 31.758, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1388.6652, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3831, "query_norm": 1.6031, "queue_k_norm": 1.4416, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.08, "sent_len_1": 66.826, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1612, "stdk": 0.0465, "stdq": 0.044, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.6507, "doc_norm": 1.4436, "encoder_q-embeddings": 1592.239, "encoder_q-layer.0": 1131.4537, "encoder_q-layer.1": 1085.4861, "encoder_q-layer.10": 203.9984, "encoder_q-layer.11": 500.1685, "encoder_q-layer.2": 1149.5433, "encoder_q-layer.3": 1241.0276, "encoder_q-layer.4": 1318.96, "encoder_q-layer.5": 1251.7581, "encoder_q-layer.6": 1275.9324, "encoder_q-layer.7": 1418.6143, "encoder_q-layer.8": 1303.035, "encoder_q-layer.9": 570.09, "epoch": 0.15, "inbatch_neg_score": 0.3902, "inbatch_pos_score": 0.9868, "learning_rate": 4.7e-05, "loss": 3.6507, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.7955, "num_token_overlap": 15.7558, "num_token_query": 42.0907, "num_token_union": 68.3588, "num_word_context": 202.3793, "num_word_doc": 49.8484, "num_word_query": 31.7897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1774.2206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3882, "query_norm": 1.5585, "queue_k_norm": 1.4425, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.0907, "sent_len_1": 66.7955, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2713, "stdk": 0.0468, "stdq": 0.0437, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.6774, "doc_norm": 1.4458, "encoder_q-embeddings": 675.153, "encoder_q-layer.0": 525.791, "encoder_q-layer.1": 582.7052, "encoder_q-layer.10": 225.6026, "encoder_q-layer.11": 638.4933, "encoder_q-layer.2": 583.2147, "encoder_q-layer.3": 597.736, "encoder_q-layer.4": 596.9366, "encoder_q-layer.5": 488.837, "encoder_q-layer.6": 553.9574, "encoder_q-layer.7": 494.8066, "encoder_q-layer.8": 436.0565, "encoder_q-layer.9": 274.5784, "epoch": 0.15, "inbatch_neg_score": 0.4022, "inbatch_pos_score": 1.0205, "learning_rate": 4.6944444444444446e-05, "loss": 3.6774, "norm_diff": 0.1005, "norm_loss": 0.0, "num_token_doc": 66.6528, "num_token_overlap": 15.8863, "num_token_query": 42.5333, "num_token_union": 68.4806, "num_word_context": 202.0775, "num_word_doc": 49.7446, "num_word_query": 32.1472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 799.0885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4031, "query_norm": 1.5463, "queue_k_norm": 1.4455, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5333, "sent_len_1": 66.6528, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3963, "stdk": 0.0469, "stdq": 0.0455, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6857, "doc_norm": 1.4471, "encoder_q-embeddings": 484.7214, "encoder_q-layer.0": 355.3407, "encoder_q-layer.1": 384.8264, "encoder_q-layer.10": 229.76, "encoder_q-layer.11": 551.4348, "encoder_q-layer.2": 428.2929, "encoder_q-layer.3": 455.8882, "encoder_q-layer.4": 436.7751, "encoder_q-layer.5": 422.5442, "encoder_q-layer.6": 418.2625, "encoder_q-layer.7": 441.5476, "encoder_q-layer.8": 568.7086, "encoder_q-layer.9": 346.6866, "epoch": 0.15, "inbatch_neg_score": 0.3821, "inbatch_pos_score": 0.9688, "learning_rate": 4.6888888888888895e-05, "loss": 3.6857, "norm_diff": 0.0357, "norm_loss": 0.0, "num_token_doc": 66.6137, "num_token_overlap": 15.7379, "num_token_query": 42.1545, "num_token_union": 68.4155, "num_word_context": 202.1295, "num_word_doc": 49.7354, "num_word_query": 31.8416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 660.9358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3811, "query_norm": 1.4828, "queue_k_norm": 1.4452, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1545, "sent_len_1": 66.6137, "sent_len_max_0": 128.0, "sent_len_max_1": 186.2925, "stdk": 0.0469, "stdq": 0.0443, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6466, "doc_norm": 1.4501, "encoder_q-embeddings": 749.8566, "encoder_q-layer.0": 533.386, "encoder_q-layer.1": 621.6685, "encoder_q-layer.10": 201.0998, "encoder_q-layer.11": 470.1865, "encoder_q-layer.2": 689.5514, "encoder_q-layer.3": 650.0803, "encoder_q-layer.4": 626.5859, "encoder_q-layer.5": 635.375, "encoder_q-layer.6": 639.4966, "encoder_q-layer.7": 582.0992, "encoder_q-layer.8": 597.3687, "encoder_q-layer.9": 347.6339, "epoch": 0.15, "inbatch_neg_score": 0.3716, "inbatch_pos_score": 1.002, "learning_rate": 4.683333333333334e-05, "loss": 3.6466, "norm_diff": 0.0481, "norm_loss": 0.0, "num_token_doc": 66.7532, "num_token_overlap": 15.8324, "num_token_query": 42.2816, "num_token_union": 68.4469, "num_word_context": 202.4372, "num_word_doc": 49.7925, "num_word_query": 31.9252, "postclip_grad_norm": 1.0, "preclip_grad_norm": 877.8689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3713, "query_norm": 1.4982, "queue_k_norm": 1.4454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2816, "sent_len_1": 66.7532, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2488, "stdk": 0.047, "stdq": 0.0448, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.6392, "doc_norm": 1.4509, "encoder_q-embeddings": 740.3431, "encoder_q-layer.0": 548.9786, "encoder_q-layer.1": 627.8772, "encoder_q-layer.10": 203.0147, "encoder_q-layer.11": 531.2632, "encoder_q-layer.2": 702.9664, "encoder_q-layer.3": 755.4263, "encoder_q-layer.4": 715.941, "encoder_q-layer.5": 701.4026, "encoder_q-layer.6": 728.5477, "encoder_q-layer.7": 680.5075, "encoder_q-layer.8": 636.2668, "encoder_q-layer.9": 396.1912, "epoch": 0.15, "inbatch_neg_score": 0.3521, "inbatch_pos_score": 0.9595, "learning_rate": 4.677777777777778e-05, "loss": 3.6392, "norm_diff": 0.0806, "norm_loss": 0.0, "num_token_doc": 66.8847, "num_token_overlap": 15.8746, "num_token_query": 42.546, "num_token_union": 68.6244, "num_word_context": 202.7516, "num_word_doc": 49.9298, "num_word_query": 32.1225, "postclip_grad_norm": 1.0, "preclip_grad_norm": 939.3636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3523, "query_norm": 1.5316, "queue_k_norm": 1.4462, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.546, "sent_len_1": 66.8847, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0938, "stdk": 0.0471, "stdq": 0.0446, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.6323, "doc_norm": 1.4482, "encoder_q-embeddings": 795.9142, "encoder_q-layer.0": 540.4865, "encoder_q-layer.1": 565.0173, "encoder_q-layer.10": 214.2649, "encoder_q-layer.11": 497.3477, "encoder_q-layer.2": 561.1945, "encoder_q-layer.3": 569.407, "encoder_q-layer.4": 583.8605, "encoder_q-layer.5": 541.5474, "encoder_q-layer.6": 640.2634, "encoder_q-layer.7": 691.7209, "encoder_q-layer.8": 831.3276, "encoder_q-layer.9": 396.6856, "epoch": 0.16, "inbatch_neg_score": 0.3485, "inbatch_pos_score": 0.978, "learning_rate": 4.672222222222222e-05, "loss": 3.6323, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.9904, "num_token_overlap": 15.847, "num_token_query": 42.406, "num_token_union": 68.6598, "num_word_context": 202.6325, "num_word_doc": 49.956, "num_word_query": 32.0027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 921.1897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3464, "query_norm": 1.4961, "queue_k_norm": 1.4482, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.406, "sent_len_1": 66.9904, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9212, "stdk": 0.0471, "stdq": 0.0445, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.6415, "doc_norm": 1.443, "encoder_q-embeddings": 1322.0654, "encoder_q-layer.0": 949.7702, "encoder_q-layer.1": 936.8058, "encoder_q-layer.10": 245.7614, "encoder_q-layer.11": 570.5755, "encoder_q-layer.2": 950.0184, "encoder_q-layer.3": 1036.0665, "encoder_q-layer.4": 967.2171, "encoder_q-layer.5": 780.1869, "encoder_q-layer.6": 707.1928, "encoder_q-layer.7": 618.0438, "encoder_q-layer.8": 572.4471, "encoder_q-layer.9": 351.8169, "epoch": 0.16, "inbatch_neg_score": 0.3479, "inbatch_pos_score": 0.9419, "learning_rate": 4.666666666666667e-05, "loss": 3.6415, "norm_diff": 0.0389, "norm_loss": 0.0, "num_token_doc": 66.7908, "num_token_overlap": 15.853, "num_token_query": 42.3187, "num_token_union": 68.456, "num_word_context": 202.29, "num_word_doc": 49.8019, "num_word_query": 31.9572, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1270.5338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3447, "query_norm": 1.482, "queue_k_norm": 1.4484, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3187, "sent_len_1": 66.7908, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.3313, "stdk": 0.0469, "stdq": 0.0433, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.6321, "doc_norm": 1.4367, "encoder_q-embeddings": 289.888, "encoder_q-layer.0": 209.9121, "encoder_q-layer.1": 229.8432, "encoder_q-layer.10": 202.8674, "encoder_q-layer.11": 519.2643, "encoder_q-layer.2": 261.5074, "encoder_q-layer.3": 274.3568, "encoder_q-layer.4": 300.6447, "encoder_q-layer.5": 309.6368, "encoder_q-layer.6": 300.0648, "encoder_q-layer.7": 290.7686, "encoder_q-layer.8": 287.0991, "encoder_q-layer.9": 230.6004, "epoch": 0.16, "inbatch_neg_score": 0.3275, "inbatch_pos_score": 0.917, "learning_rate": 4.6611111111111114e-05, "loss": 3.6321, "norm_diff": 0.0215, "norm_loss": 0.0, "num_token_doc": 66.7838, "num_token_overlap": 15.8299, "num_token_query": 42.3607, "num_token_union": 68.5208, "num_word_context": 202.3484, "num_word_doc": 49.8631, "num_word_query": 31.9981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 432.5066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3286, "query_norm": 1.4561, "queue_k_norm": 1.4432, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3607, "sent_len_1": 66.7838, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3438, "stdk": 0.0468, "stdq": 0.0428, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.6078, "doc_norm": 1.4387, "encoder_q-embeddings": 764.4092, "encoder_q-layer.0": 567.2731, "encoder_q-layer.1": 581.9747, "encoder_q-layer.10": 214.7139, "encoder_q-layer.11": 588.101, "encoder_q-layer.2": 591.1583, "encoder_q-layer.3": 591.5094, "encoder_q-layer.4": 568.1868, "encoder_q-layer.5": 536.5803, "encoder_q-layer.6": 480.8484, "encoder_q-layer.7": 443.8154, "encoder_q-layer.8": 402.8752, "encoder_q-layer.9": 261.5742, "epoch": 0.16, "inbatch_neg_score": 0.3596, "inbatch_pos_score": 0.9546, "learning_rate": 4.6555555555555556e-05, "loss": 3.6078, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.6544, "num_token_overlap": 15.8429, "num_token_query": 42.4239, "num_token_union": 68.4426, "num_word_context": 201.9432, "num_word_doc": 49.759, "num_word_query": 32.0613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 811.7412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3569, "query_norm": 1.4936, "queue_k_norm": 1.4408, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4239, "sent_len_1": 66.6544, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1675, "stdk": 0.0468, "stdq": 0.0436, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.6011, "doc_norm": 1.4397, "encoder_q-embeddings": 642.244, "encoder_q-layer.0": 490.0446, "encoder_q-layer.1": 487.1892, "encoder_q-layer.10": 207.3073, "encoder_q-layer.11": 519.2175, "encoder_q-layer.2": 514.3213, "encoder_q-layer.3": 516.1094, "encoder_q-layer.4": 486.6534, "encoder_q-layer.5": 443.9366, "encoder_q-layer.6": 476.9266, "encoder_q-layer.7": 451.968, "encoder_q-layer.8": 456.0283, "encoder_q-layer.9": 266.735, "epoch": 0.16, "inbatch_neg_score": 0.3855, "inbatch_pos_score": 1.001, "learning_rate": 4.6500000000000005e-05, "loss": 3.6011, "norm_diff": 0.0777, "norm_loss": 0.0, "num_token_doc": 67.0822, "num_token_overlap": 15.9517, "num_token_query": 42.5515, "num_token_union": 68.6703, "num_word_context": 202.5013, "num_word_doc": 50.0266, "num_word_query": 32.1469, "postclip_grad_norm": 1.0, "preclip_grad_norm": 713.7167, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3838, "query_norm": 1.5174, "queue_k_norm": 1.439, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5515, "sent_len_1": 67.0822, "sent_len_max_0": 127.9963, "sent_len_max_1": 192.1687, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 48.7305, "active_queue_size": 16384.0, "cl_loss": 3.6253, "doc_norm": 1.4406, "encoder_q-embeddings": 2888.7888, "encoder_q-layer.0": 1943.1104, "encoder_q-layer.1": 2278.0767, "encoder_q-layer.10": 220.0882, "encoder_q-layer.11": 544.0361, "encoder_q-layer.2": 2469.1692, "encoder_q-layer.3": 2417.7881, "encoder_q-layer.4": 2237.6799, "encoder_q-layer.5": 1980.5455, "encoder_q-layer.6": 2123.6243, "encoder_q-layer.7": 1307.6627, "encoder_q-layer.8": 973.6021, "encoder_q-layer.9": 466.0422, "epoch": 0.16, "inbatch_neg_score": 0.3837, "inbatch_pos_score": 0.9878, "learning_rate": 4.644444444444445e-05, "loss": 3.6253, "norm_diff": 0.0825, "norm_loss": 0.0, "num_token_doc": 66.7465, "num_token_overlap": 15.822, "num_token_query": 42.2591, "num_token_union": 68.4422, "num_word_context": 202.3909, "num_word_doc": 49.776, "num_word_query": 31.916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2927.8315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3818, "query_norm": 1.5231, "queue_k_norm": 1.4396, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2591, "sent_len_1": 66.7465, "sent_len_max_0": 127.99, "sent_len_max_1": 189.4025, "stdk": 0.047, "stdq": 0.0445, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.6122, "doc_norm": 1.4316, "encoder_q-embeddings": 492.9006, "encoder_q-layer.0": 348.4891, "encoder_q-layer.1": 381.6311, "encoder_q-layer.10": 227.0872, "encoder_q-layer.11": 543.3285, "encoder_q-layer.2": 380.0787, "encoder_q-layer.3": 393.774, "encoder_q-layer.4": 413.9428, "encoder_q-layer.5": 390.3987, "encoder_q-layer.6": 400.7782, "encoder_q-layer.7": 374.4058, "encoder_q-layer.8": 317.9244, "encoder_q-layer.9": 238.2111, "epoch": 0.16, "inbatch_neg_score": 0.3854, "inbatch_pos_score": 0.9756, "learning_rate": 4.638888888888889e-05, "loss": 3.6122, "norm_diff": 0.0919, "norm_loss": 0.0, "num_token_doc": 66.5764, "num_token_overlap": 15.8017, "num_token_query": 42.2945, "num_token_union": 68.3863, "num_word_context": 201.8447, "num_word_doc": 49.6722, "num_word_query": 31.9489, "postclip_grad_norm": 1.0, "preclip_grad_norm": 587.2522, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3838, "query_norm": 1.5235, "queue_k_norm": 1.4386, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2945, "sent_len_1": 66.5764, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0888, "stdk": 0.0466, "stdq": 0.0443, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.5975, "doc_norm": 1.4374, "encoder_q-embeddings": 1688.5289, "encoder_q-layer.0": 1233.9553, "encoder_q-layer.1": 1405.8752, "encoder_q-layer.10": 403.1811, "encoder_q-layer.11": 933.3039, "encoder_q-layer.2": 1549.2078, "encoder_q-layer.3": 1529.9707, "encoder_q-layer.4": 1624.265, "encoder_q-layer.5": 1321.0828, "encoder_q-layer.6": 1451.4031, "encoder_q-layer.7": 1486.1793, "encoder_q-layer.8": 1600.6772, "encoder_q-layer.9": 1005.3551, "epoch": 0.16, "inbatch_neg_score": 0.3383, "inbatch_pos_score": 0.9502, "learning_rate": 4.633333333333333e-05, "loss": 3.5975, "norm_diff": 0.0316, "norm_loss": 0.0, "num_token_doc": 66.7452, "num_token_overlap": 15.8789, "num_token_query": 42.5533, "num_token_union": 68.5796, "num_word_context": 202.63, "num_word_doc": 49.8191, "num_word_query": 32.1778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2068.4862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3367, "query_norm": 1.4689, "queue_k_norm": 1.4414, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5533, "sent_len_1": 66.7452, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7125, "stdk": 0.0469, "stdq": 0.0441, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.6034, "doc_norm": 1.4345, "encoder_q-embeddings": 1752.1589, "encoder_q-layer.0": 1262.6821, "encoder_q-layer.1": 1418.4939, "encoder_q-layer.10": 390.5309, "encoder_q-layer.11": 1021.8047, "encoder_q-layer.2": 1652.812, "encoder_q-layer.3": 1632.9507, "encoder_q-layer.4": 1678.5809, "encoder_q-layer.5": 1577.032, "encoder_q-layer.6": 1685.8392, "encoder_q-layer.7": 1506.2286, "encoder_q-layer.8": 1631.9935, "encoder_q-layer.9": 820.7689, "epoch": 0.16, "inbatch_neg_score": 0.3559, "inbatch_pos_score": 0.9897, "learning_rate": 4.627777777777778e-05, "loss": 3.6034, "norm_diff": 0.0816, "norm_loss": 0.0, "num_token_doc": 66.8052, "num_token_overlap": 15.8817, "num_token_query": 42.4969, "num_token_union": 68.6004, "num_word_context": 201.9926, "num_word_doc": 49.8574, "num_word_query": 32.1091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2188.4932, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3545, "query_norm": 1.5161, "queue_k_norm": 1.4409, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4969, "sent_len_1": 66.8052, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0987, "stdk": 0.0468, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.6009, "doc_norm": 1.4434, "encoder_q-embeddings": 782.8083, "encoder_q-layer.0": 544.4692, "encoder_q-layer.1": 633.5132, "encoder_q-layer.10": 385.703, "encoder_q-layer.11": 1022.299, "encoder_q-layer.2": 703.9506, "encoder_q-layer.3": 751.5125, "encoder_q-layer.4": 741.4271, "encoder_q-layer.5": 722.5582, "encoder_q-layer.6": 704.5741, "encoder_q-layer.7": 654.5765, "encoder_q-layer.8": 640.227, "encoder_q-layer.9": 439.502, "epoch": 0.16, "inbatch_neg_score": 0.351, "inbatch_pos_score": 0.9834, "learning_rate": 4.6222222222222224e-05, "loss": 3.6009, "norm_diff": 0.0676, "norm_loss": 0.0, "num_token_doc": 66.9465, "num_token_overlap": 15.8264, "num_token_query": 42.4224, "num_token_union": 68.6263, "num_word_context": 202.4291, "num_word_doc": 49.9416, "num_word_query": 32.062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1033.2654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3508, "query_norm": 1.511, "queue_k_norm": 1.4385, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4224, "sent_len_1": 66.9465, "sent_len_max_0": 128.0, "sent_len_max_1": 189.69, "stdk": 0.0472, "stdq": 0.0449, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5895, "doc_norm": 1.4344, "encoder_q-embeddings": 1239.196, "encoder_q-layer.0": 863.7776, "encoder_q-layer.1": 921.7313, "encoder_q-layer.10": 428.1507, "encoder_q-layer.11": 989.1409, "encoder_q-layer.2": 1010.6331, "encoder_q-layer.3": 1059.0502, "encoder_q-layer.4": 1051.8989, "encoder_q-layer.5": 871.2607, "encoder_q-layer.6": 758.6506, "encoder_q-layer.7": 660.8771, "encoder_q-layer.8": 562.7551, "encoder_q-layer.9": 405.7786, "epoch": 0.16, "inbatch_neg_score": 0.3432, "inbatch_pos_score": 0.9473, "learning_rate": 4.6166666666666666e-05, "loss": 3.5895, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.8, "num_token_overlap": 15.8942, "num_token_query": 42.5103, "num_token_union": 68.5442, "num_word_context": 202.6184, "num_word_doc": 49.8364, "num_word_query": 32.1138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1320.2069, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.342, "query_norm": 1.4729, "queue_k_norm": 1.4354, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5103, "sent_len_1": 66.8, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6725, "stdk": 0.0469, "stdq": 0.0439, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.5925, "doc_norm": 1.4339, "encoder_q-embeddings": 696.7814, "encoder_q-layer.0": 470.012, "encoder_q-layer.1": 518.2905, "encoder_q-layer.10": 393.696, "encoder_q-layer.11": 952.701, "encoder_q-layer.2": 599.8771, "encoder_q-layer.3": 618.9889, "encoder_q-layer.4": 645.2815, "encoder_q-layer.5": 634.6472, "encoder_q-layer.6": 581.3182, "encoder_q-layer.7": 491.6764, "encoder_q-layer.8": 474.5974, "encoder_q-layer.9": 361.7242, "epoch": 0.17, "inbatch_neg_score": 0.3269, "inbatch_pos_score": 0.9478, "learning_rate": 4.6111111111111115e-05, "loss": 3.5925, "norm_diff": 0.0668, "norm_loss": 0.0, "num_token_doc": 66.6202, "num_token_overlap": 15.8222, "num_token_query": 42.2627, "num_token_union": 68.3237, "num_word_context": 202.0207, "num_word_doc": 49.7112, "num_word_query": 31.9212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 895.2449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3257, "query_norm": 1.5007, "queue_k_norm": 1.435, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2627, "sent_len_1": 66.6202, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.5712, "stdk": 0.0469, "stdq": 0.0457, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.6233, "doc_norm": 1.4298, "encoder_q-embeddings": 1605.0844, "encoder_q-layer.0": 1259.9294, "encoder_q-layer.1": 1356.4359, "encoder_q-layer.10": 360.1023, "encoder_q-layer.11": 940.9524, "encoder_q-layer.2": 1598.0497, "encoder_q-layer.3": 1580.0724, "encoder_q-layer.4": 1688.2457, "encoder_q-layer.5": 1306.4515, "encoder_q-layer.6": 961.1462, "encoder_q-layer.7": 839.5646, "encoder_q-layer.8": 720.2242, "encoder_q-layer.9": 430.8874, "epoch": 0.17, "inbatch_neg_score": 0.3661, "inbatch_pos_score": 0.9663, "learning_rate": 4.605555555555556e-05, "loss": 3.6233, "norm_diff": 0.043, "norm_loss": 0.0, "num_token_doc": 66.759, "num_token_overlap": 15.8262, "num_token_query": 42.2836, "num_token_union": 68.3875, "num_word_context": 202.1024, "num_word_doc": 49.7638, "num_word_query": 31.917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1830.2616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3635, "query_norm": 1.4729, "queue_k_norm": 1.4333, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2836, "sent_len_1": 66.759, "sent_len_max_0": 128.0, "sent_len_max_1": 192.5813, "stdk": 0.0467, "stdq": 0.0435, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 50.6836, "active_queue_size": 16384.0, "cl_loss": 3.576, "doc_norm": 1.4376, "encoder_q-embeddings": 1315.8489, "encoder_q-layer.0": 1023.6698, "encoder_q-layer.1": 1044.5127, "encoder_q-layer.10": 396.4876, "encoder_q-layer.11": 951.9193, "encoder_q-layer.2": 1174.3099, "encoder_q-layer.3": 1262.5985, "encoder_q-layer.4": 1216.6235, "encoder_q-layer.5": 1144.0317, "encoder_q-layer.6": 1156.8054, "encoder_q-layer.7": 1032.6324, "encoder_q-layer.8": 775.2505, "encoder_q-layer.9": 469.5708, "epoch": 0.17, "inbatch_neg_score": 0.3387, "inbatch_pos_score": 0.9624, "learning_rate": 4.600000000000001e-05, "loss": 3.576, "norm_diff": 0.0526, "norm_loss": 0.0, "num_token_doc": 66.7774, "num_token_overlap": 15.8225, "num_token_query": 42.3073, "num_token_union": 68.4462, "num_word_context": 202.3125, "num_word_doc": 49.8103, "num_word_query": 31.9565, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1543.4189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3384, "query_norm": 1.4902, "queue_k_norm": 1.4317, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3073, "sent_len_1": 66.7774, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.1538, "stdk": 0.047, "stdq": 0.0454, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.5872, "doc_norm": 1.4348, "encoder_q-embeddings": 1349.0122, "encoder_q-layer.0": 964.9605, "encoder_q-layer.1": 1064.3611, "encoder_q-layer.10": 414.1514, "encoder_q-layer.11": 958.9268, "encoder_q-layer.2": 1273.3228, "encoder_q-layer.3": 1349.5393, "encoder_q-layer.4": 1459.0061, "encoder_q-layer.5": 1344.3345, "encoder_q-layer.6": 1308.9158, "encoder_q-layer.7": 1093.1432, "encoder_q-layer.8": 762.5334, "encoder_q-layer.9": 444.314, "epoch": 0.17, "inbatch_neg_score": 0.3305, "inbatch_pos_score": 0.9238, "learning_rate": 4.594444444444444e-05, "loss": 3.5872, "norm_diff": 0.0111, "norm_loss": 0.0, "num_token_doc": 66.7515, "num_token_overlap": 15.8373, "num_token_query": 42.2554, "num_token_union": 68.4221, "num_word_context": 202.3592, "num_word_doc": 49.8213, "num_word_query": 31.9324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1634.591, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.4405, "queue_k_norm": 1.4322, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2554, "sent_len_1": 66.7515, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0513, "stdk": 0.0469, "stdq": 0.0432, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.5868, "doc_norm": 1.4297, "encoder_q-embeddings": 1289.657, "encoder_q-layer.0": 960.8118, "encoder_q-layer.1": 1005.2467, "encoder_q-layer.10": 390.8669, "encoder_q-layer.11": 959.0228, "encoder_q-layer.2": 1044.5585, "encoder_q-layer.3": 1002.1525, "encoder_q-layer.4": 886.0569, "encoder_q-layer.5": 781.4379, "encoder_q-layer.6": 756.9587, "encoder_q-layer.7": 715.8412, "encoder_q-layer.8": 600.8575, "encoder_q-layer.9": 406.9811, "epoch": 0.17, "inbatch_neg_score": 0.326, "inbatch_pos_score": 0.9409, "learning_rate": 4.588888888888889e-05, "loss": 3.5868, "norm_diff": 0.044, "norm_loss": 0.0, "num_token_doc": 66.839, "num_token_overlap": 15.878, "num_token_query": 42.3773, "num_token_union": 68.4855, "num_word_context": 202.3774, "num_word_doc": 49.8982, "num_word_query": 32.0192, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1340.924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3242, "query_norm": 1.4738, "queue_k_norm": 1.4308, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3773, "sent_len_1": 66.839, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.0337, "stdk": 0.0468, "stdq": 0.0447, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.5869, "doc_norm": 1.4249, "encoder_q-embeddings": 961.5482, "encoder_q-layer.0": 649.1932, "encoder_q-layer.1": 712.1804, "encoder_q-layer.10": 435.3288, "encoder_q-layer.11": 938.9821, "encoder_q-layer.2": 821.8504, "encoder_q-layer.3": 928.0585, "encoder_q-layer.4": 1015.4817, "encoder_q-layer.5": 824.3787, "encoder_q-layer.6": 714.2203, "encoder_q-layer.7": 576.1783, "encoder_q-layer.8": 517.5993, "encoder_q-layer.9": 416.7428, "epoch": 0.17, "inbatch_neg_score": 0.3184, "inbatch_pos_score": 0.9258, "learning_rate": 4.5833333333333334e-05, "loss": 3.5869, "norm_diff": 0.0425, "norm_loss": 0.0, "num_token_doc": 66.9012, "num_token_overlap": 15.7976, "num_token_query": 42.3646, "num_token_union": 68.5969, "num_word_context": 202.4301, "num_word_doc": 49.8977, "num_word_query": 31.9945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1132.7676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3179, "query_norm": 1.4674, "queue_k_norm": 1.4315, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3646, "sent_len_1": 66.9012, "sent_len_max_0": 128.0, "sent_len_max_1": 190.425, "stdk": 0.0467, "stdq": 0.0446, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.6037, "doc_norm": 1.437, "encoder_q-embeddings": 959.0586, "encoder_q-layer.0": 669.1505, "encoder_q-layer.1": 701.0209, "encoder_q-layer.10": 374.7935, "encoder_q-layer.11": 913.5945, "encoder_q-layer.2": 761.3445, "encoder_q-layer.3": 784.9818, "encoder_q-layer.4": 771.4214, "encoder_q-layer.5": 782.6383, "encoder_q-layer.6": 807.5172, "encoder_q-layer.7": 707.1978, "encoder_q-layer.8": 542.726, "encoder_q-layer.9": 390.068, "epoch": 0.17, "inbatch_neg_score": 0.3434, "inbatch_pos_score": 0.9604, "learning_rate": 4.577777777777778e-05, "loss": 3.6037, "norm_diff": 0.0241, "norm_loss": 0.0, "num_token_doc": 66.6085, "num_token_overlap": 15.8386, "num_token_query": 42.4228, "num_token_union": 68.4448, "num_word_context": 202.265, "num_word_doc": 49.7261, "num_word_query": 32.0397, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1101.6698, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3418, "query_norm": 1.4612, "queue_k_norm": 1.4293, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4228, "sent_len_1": 66.6085, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.375, "stdk": 0.0471, "stdq": 0.044, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.6025, "doc_norm": 1.4288, "encoder_q-embeddings": 1313.6871, "encoder_q-layer.0": 913.517, "encoder_q-layer.1": 1034.6689, "encoder_q-layer.10": 396.907, "encoder_q-layer.11": 997.409, "encoder_q-layer.2": 1277.0358, "encoder_q-layer.3": 1474.5924, "encoder_q-layer.4": 1204.4874, "encoder_q-layer.5": 1171.2109, "encoder_q-layer.6": 1138.6396, "encoder_q-layer.7": 1131.1987, "encoder_q-layer.8": 867.9962, "encoder_q-layer.9": 553.3737, "epoch": 0.17, "inbatch_neg_score": 0.3458, "inbatch_pos_score": 0.9624, "learning_rate": 4.572222222222222e-05, "loss": 3.6025, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.8209, "num_token_overlap": 15.7929, "num_token_query": 42.3647, "num_token_union": 68.588, "num_word_context": 202.5711, "num_word_doc": 49.8838, "num_word_query": 32.0108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1633.6494, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.343, "query_norm": 1.4822, "queue_k_norm": 1.4307, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3647, "sent_len_1": 66.8209, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.685, "stdk": 0.0468, "stdq": 0.0445, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.5743, "doc_norm": 1.4327, "encoder_q-embeddings": 1131.5072, "encoder_q-layer.0": 814.6004, "encoder_q-layer.1": 895.6871, "encoder_q-layer.10": 416.5865, "encoder_q-layer.11": 1018.8691, "encoder_q-layer.2": 964.2043, "encoder_q-layer.3": 932.9042, "encoder_q-layer.4": 861.5437, "encoder_q-layer.5": 783.8425, "encoder_q-layer.6": 822.2311, "encoder_q-layer.7": 811.8021, "encoder_q-layer.8": 876.8247, "encoder_q-layer.9": 532.8738, "epoch": 0.17, "inbatch_neg_score": 0.3421, "inbatch_pos_score": 0.9683, "learning_rate": 4.566666666666667e-05, "loss": 3.5743, "norm_diff": 0.0771, "norm_loss": 0.0, "num_token_doc": 67.2151, "num_token_overlap": 15.9258, "num_token_query": 42.5597, "num_token_union": 68.79, "num_word_context": 203.0972, "num_word_doc": 50.1627, "num_word_query": 32.1701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1314.9849, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3423, "query_norm": 1.5098, "queue_k_norm": 1.4306, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5597, "sent_len_1": 67.2151, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5462, "stdk": 0.047, "stdq": 0.0455, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.6111, "doc_norm": 1.4315, "encoder_q-embeddings": 1115.2931, "encoder_q-layer.0": 852.2836, "encoder_q-layer.1": 895.5085, "encoder_q-layer.10": 370.1819, "encoder_q-layer.11": 945.5392, "encoder_q-layer.2": 1040.0896, "encoder_q-layer.3": 1106.0568, "encoder_q-layer.4": 1099.7023, "encoder_q-layer.5": 1092.1254, "encoder_q-layer.6": 1012.6152, "encoder_q-layer.7": 923.4977, "encoder_q-layer.8": 800.7202, "encoder_q-layer.9": 463.6799, "epoch": 0.17, "inbatch_neg_score": 0.3245, "inbatch_pos_score": 0.9409, "learning_rate": 4.561111111111112e-05, "loss": 3.6111, "norm_diff": 0.0265, "norm_loss": 0.0, "num_token_doc": 66.8043, "num_token_overlap": 15.7921, "num_token_query": 42.2324, "num_token_union": 68.4781, "num_word_context": 202.0657, "num_word_doc": 49.8255, "num_word_query": 31.8891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1384.1653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3228, "query_norm": 1.458, "queue_k_norm": 1.4288, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2324, "sent_len_1": 66.8043, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5925, "stdk": 0.0469, "stdq": 0.0441, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.5921, "doc_norm": 1.4278, "encoder_q-embeddings": 952.8395, "encoder_q-layer.0": 668.974, "encoder_q-layer.1": 692.0582, "encoder_q-layer.10": 410.1049, "encoder_q-layer.11": 1090.6775, "encoder_q-layer.2": 794.5284, "encoder_q-layer.3": 799.6236, "encoder_q-layer.4": 790.0359, "encoder_q-layer.5": 732.8137, "encoder_q-layer.6": 821.9344, "encoder_q-layer.7": 705.9232, "encoder_q-layer.8": 556.7159, "encoder_q-layer.9": 425.4374, "epoch": 0.18, "inbatch_neg_score": 0.3538, "inbatch_pos_score": 0.9561, "learning_rate": 4.555555555555556e-05, "loss": 3.5921, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.5198, "num_token_overlap": 15.8316, "num_token_query": 42.3136, "num_token_union": 68.298, "num_word_context": 202.0683, "num_word_doc": 49.6096, "num_word_query": 31.9668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1122.1749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3525, "query_norm": 1.4757, "queue_k_norm": 1.4287, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3136, "sent_len_1": 66.5198, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.145, "stdk": 0.0468, "stdq": 0.0437, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6115, "doc_norm": 1.433, "encoder_q-embeddings": 1078.5332, "encoder_q-layer.0": 881.2341, "encoder_q-layer.1": 855.7496, "encoder_q-layer.10": 370.316, "encoder_q-layer.11": 974.3949, "encoder_q-layer.2": 865.1754, "encoder_q-layer.3": 844.459, "encoder_q-layer.4": 848.5945, "encoder_q-layer.5": 852.626, "encoder_q-layer.6": 898.932, "encoder_q-layer.7": 837.2654, "encoder_q-layer.8": 728.4661, "encoder_q-layer.9": 458.0752, "epoch": 0.18, "inbatch_neg_score": 0.3482, "inbatch_pos_score": 0.9487, "learning_rate": 4.55e-05, "loss": 3.6115, "norm_diff": 0.0274, "norm_loss": 0.0, "num_token_doc": 66.7405, "num_token_overlap": 15.8044, "num_token_query": 42.3222, "num_token_union": 68.4784, "num_word_context": 202.3063, "num_word_doc": 49.8079, "num_word_query": 31.9763, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1253.8575, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3469, "query_norm": 1.4604, "queue_k_norm": 1.4273, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3222, "sent_len_1": 66.7405, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.8625, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.5801, "doc_norm": 1.4256, "encoder_q-embeddings": 2339.0442, "encoder_q-layer.0": 1681.0601, "encoder_q-layer.1": 1777.3472, "encoder_q-layer.10": 449.598, "encoder_q-layer.11": 991.2955, "encoder_q-layer.2": 1974.4479, "encoder_q-layer.3": 1910.9711, "encoder_q-layer.4": 1826.3973, "encoder_q-layer.5": 2052.0449, "encoder_q-layer.6": 1751.4703, "encoder_q-layer.7": 1941.3229, "encoder_q-layer.8": 2057.6033, "encoder_q-layer.9": 990.5076, "epoch": 0.18, "inbatch_neg_score": 0.3445, "inbatch_pos_score": 0.9404, "learning_rate": 4.5444444444444444e-05, "loss": 3.5801, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 66.8074, "num_token_overlap": 15.8611, "num_token_query": 42.4606, "num_token_union": 68.5604, "num_word_context": 202.2435, "num_word_doc": 49.8345, "num_word_query": 32.0951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2678.8931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.344, "query_norm": 1.4451, "queue_k_norm": 1.4277, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4606, "sent_len_1": 66.8074, "sent_len_max_0": 127.9887, "sent_len_max_1": 191.2837, "stdk": 0.0466, "stdq": 0.0436, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.6223, "doc_norm": 1.4318, "encoder_q-embeddings": 3313.3923, "encoder_q-layer.0": 2561.7954, "encoder_q-layer.1": 2731.2705, "encoder_q-layer.10": 450.2909, "encoder_q-layer.11": 1089.1791, "encoder_q-layer.2": 2770.6736, "encoder_q-layer.3": 2815.511, "encoder_q-layer.4": 2488.6919, "encoder_q-layer.5": 2178.7466, "encoder_q-layer.6": 2060.345, "encoder_q-layer.7": 2065.5552, "encoder_q-layer.8": 1660.4171, "encoder_q-layer.9": 841.8031, "epoch": 0.18, "inbatch_neg_score": 0.3438, "inbatch_pos_score": 0.918, "learning_rate": 4.538888888888889e-05, "loss": 3.6223, "norm_diff": 0.0118, "norm_loss": 0.0, "num_token_doc": 67.0549, "num_token_overlap": 15.7567, "num_token_query": 42.4389, "num_token_union": 68.7139, "num_word_context": 202.4185, "num_word_doc": 49.9955, "num_word_query": 32.0632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3441.6924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3428, "query_norm": 1.4348, "queue_k_norm": 1.4274, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4389, "sent_len_1": 67.0549, "sent_len_max_0": 128.0, "sent_len_max_1": 189.58, "stdk": 0.0469, "stdq": 0.0431, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.5831, "doc_norm": 1.4275, "encoder_q-embeddings": 860.3828, "encoder_q-layer.0": 591.6896, "encoder_q-layer.1": 646.6881, "encoder_q-layer.10": 393.1041, "encoder_q-layer.11": 994.2433, "encoder_q-layer.2": 736.3335, "encoder_q-layer.3": 705.4342, "encoder_q-layer.4": 691.464, "encoder_q-layer.5": 639.7921, "encoder_q-layer.6": 610.9686, "encoder_q-layer.7": 567.1425, "encoder_q-layer.8": 588.8643, "encoder_q-layer.9": 430.251, "epoch": 0.18, "inbatch_neg_score": 0.3246, "inbatch_pos_score": 0.9224, "learning_rate": 4.5333333333333335e-05, "loss": 3.5831, "norm_diff": 0.02, "norm_loss": 0.0, "num_token_doc": 67.0069, "num_token_overlap": 15.8582, "num_token_query": 42.4308, "num_token_union": 68.691, "num_word_context": 202.5942, "num_word_doc": 50.0305, "num_word_query": 32.0433, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.067, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.4411, "queue_k_norm": 1.4283, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4308, "sent_len_1": 67.0069, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.625, "stdk": 0.0468, "stdq": 0.0437, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5829, "doc_norm": 1.4284, "encoder_q-embeddings": 8306.8213, "encoder_q-layer.0": 5350.2715, "encoder_q-layer.1": 4133.1562, "encoder_q-layer.10": 382.9446, "encoder_q-layer.11": 954.5805, "encoder_q-layer.2": 4156.4019, "encoder_q-layer.3": 4203.9043, "encoder_q-layer.4": 4734.2568, "encoder_q-layer.5": 4188.6543, "encoder_q-layer.6": 3543.821, "encoder_q-layer.7": 2616.2798, "encoder_q-layer.8": 2316.5605, "encoder_q-layer.9": 1051.295, "epoch": 0.18, "inbatch_neg_score": 0.3366, "inbatch_pos_score": 0.9487, "learning_rate": 4.527777777777778e-05, "loss": 3.5829, "norm_diff": 0.026, "norm_loss": 0.0, "num_token_doc": 66.7311, "num_token_overlap": 15.8201, "num_token_query": 42.2968, "num_token_union": 68.4612, "num_word_context": 202.4089, "num_word_doc": 49.7979, "num_word_query": 31.9466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6746.0554, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3364, "query_norm": 1.4544, "queue_k_norm": 1.4273, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2968, "sent_len_1": 66.7311, "sent_len_max_0": 128.0, "sent_len_max_1": 188.625, "stdk": 0.0468, "stdq": 0.0442, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6099, "doc_norm": 1.4261, "encoder_q-embeddings": 1530.4965, "encoder_q-layer.0": 1046.3708, "encoder_q-layer.1": 1121.1531, "encoder_q-layer.10": 800.0857, "encoder_q-layer.11": 2058.9438, "encoder_q-layer.2": 1162.5875, "encoder_q-layer.3": 1277.481, "encoder_q-layer.4": 1229.0793, "encoder_q-layer.5": 1205.6686, "encoder_q-layer.6": 1229.4586, "encoder_q-layer.7": 1181.9797, "encoder_q-layer.8": 1151.8102, "encoder_q-layer.9": 831.6967, "epoch": 0.18, "inbatch_neg_score": 0.3463, "inbatch_pos_score": 0.9551, "learning_rate": 4.522222222222223e-05, "loss": 3.6099, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.7746, "num_token_overlap": 15.7758, "num_token_query": 42.2409, "num_token_union": 68.4719, "num_word_context": 202.3138, "num_word_doc": 49.7524, "num_word_query": 31.8894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1934.2706, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3457, "query_norm": 1.4653, "queue_k_norm": 1.4275, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2409, "sent_len_1": 66.7746, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.9313, "stdk": 0.0467, "stdq": 0.0451, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.6173, "doc_norm": 1.4154, "encoder_q-embeddings": 2592.8159, "encoder_q-layer.0": 1880.7739, "encoder_q-layer.1": 1884.1881, "encoder_q-layer.10": 414.0011, "encoder_q-layer.11": 987.2679, "encoder_q-layer.2": 2043.6427, "encoder_q-layer.3": 2265.9817, "encoder_q-layer.4": 2232.9709, "encoder_q-layer.5": 2159.5417, "encoder_q-layer.6": 1797.4336, "encoder_q-layer.7": 1762.7358, "encoder_q-layer.8": 1235.0251, "encoder_q-layer.9": 513.2761, "epoch": 0.18, "inbatch_neg_score": 0.3352, "inbatch_pos_score": 0.9297, "learning_rate": 4.516666666666667e-05, "loss": 3.6173, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.6732, "num_token_overlap": 15.7985, "num_token_query": 42.416, "num_token_union": 68.5031, "num_word_context": 202.4404, "num_word_doc": 49.7383, "num_word_query": 32.0584, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2778.5501, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.335, "query_norm": 1.465, "queue_k_norm": 1.4273, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.416, "sent_len_1": 66.6732, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9563, "stdk": 0.0463, "stdq": 0.0448, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5767, "doc_norm": 1.4261, "encoder_q-embeddings": 957.83, "encoder_q-layer.0": 670.7422, "encoder_q-layer.1": 743.0326, "encoder_q-layer.10": 411.9218, "encoder_q-layer.11": 1071.9236, "encoder_q-layer.2": 803.7259, "encoder_q-layer.3": 903.327, "encoder_q-layer.4": 856.08, "encoder_q-layer.5": 779.5385, "encoder_q-layer.6": 824.3297, "encoder_q-layer.7": 820.003, "encoder_q-layer.8": 746.4066, "encoder_q-layer.9": 443.8858, "epoch": 0.18, "inbatch_neg_score": 0.3169, "inbatch_pos_score": 0.9258, "learning_rate": 4.511111111111112e-05, "loss": 3.5767, "norm_diff": 0.0115, "norm_loss": 0.0, "num_token_doc": 66.7475, "num_token_overlap": 15.8744, "num_token_query": 42.4093, "num_token_union": 68.5213, "num_word_context": 202.3453, "num_word_doc": 49.8135, "num_word_query": 32.0832, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1198.385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3162, "query_norm": 1.4337, "queue_k_norm": 1.4272, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4093, "sent_len_1": 66.7475, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9863, "stdk": 0.0467, "stdq": 0.0437, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5846, "doc_norm": 1.4221, "encoder_q-embeddings": 887.3458, "encoder_q-layer.0": 659.3208, "encoder_q-layer.1": 674.3492, "encoder_q-layer.10": 341.549, "encoder_q-layer.11": 903.5286, "encoder_q-layer.2": 758.1392, "encoder_q-layer.3": 698.5262, "encoder_q-layer.4": 676.4344, "encoder_q-layer.5": 616.1525, "encoder_q-layer.6": 628.089, "encoder_q-layer.7": 616.1518, "encoder_q-layer.8": 524.3536, "encoder_q-layer.9": 396.5403, "epoch": 0.18, "inbatch_neg_score": 0.3136, "inbatch_pos_score": 0.9341, "learning_rate": 4.5055555555555554e-05, "loss": 3.5846, "norm_diff": 0.0549, "norm_loss": 0.0, "num_token_doc": 66.6352, "num_token_overlap": 15.8055, "num_token_query": 42.3899, "num_token_union": 68.4468, "num_word_context": 202.3674, "num_word_doc": 49.6905, "num_word_query": 31.9907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1013.9018, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3132, "query_norm": 1.477, "queue_k_norm": 1.4237, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3899, "sent_len_1": 66.6352, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1675, "stdk": 0.0466, "stdq": 0.0446, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 50.1953, "active_queue_size": 16384.0, "cl_loss": 3.5944, "doc_norm": 1.4221, "encoder_q-embeddings": 629.3882, "encoder_q-layer.0": 429.6188, "encoder_q-layer.1": 452.5778, "encoder_q-layer.10": 362.167, "encoder_q-layer.11": 928.9244, "encoder_q-layer.2": 463.8291, "encoder_q-layer.3": 464.3407, "encoder_q-layer.4": 486.9811, "encoder_q-layer.5": 476.1086, "encoder_q-layer.6": 491.3627, "encoder_q-layer.7": 442.6024, "encoder_q-layer.8": 435.9613, "encoder_q-layer.9": 360.1801, "epoch": 0.19, "inbatch_neg_score": 0.3227, "inbatch_pos_score": 0.937, "learning_rate": 4.5e-05, "loss": 3.5944, "norm_diff": 0.0636, "norm_loss": 0.0, "num_token_doc": 66.8726, "num_token_overlap": 15.7339, "num_token_query": 42.0614, "num_token_union": 68.4646, "num_word_context": 202.3304, "num_word_doc": 49.8739, "num_word_query": 31.7632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 801.1609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3237, "query_norm": 1.4857, "queue_k_norm": 1.4235, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.0614, "sent_len_1": 66.8726, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5225, "stdk": 0.0467, "stdq": 0.0448, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.6006, "doc_norm": 1.4292, "encoder_q-embeddings": 3157.9209, "encoder_q-layer.0": 2274.2253, "encoder_q-layer.1": 2526.2461, "encoder_q-layer.10": 427.5291, "encoder_q-layer.11": 1013.6812, "encoder_q-layer.2": 2461.0847, "encoder_q-layer.3": 2080.866, "encoder_q-layer.4": 1960.9528, "encoder_q-layer.5": 1696.7639, "encoder_q-layer.6": 1508.6233, "encoder_q-layer.7": 1277.8521, "encoder_q-layer.8": 1156.5458, "encoder_q-layer.9": 629.9006, "epoch": 0.19, "inbatch_neg_score": 0.311, "inbatch_pos_score": 0.918, "learning_rate": 4.4944444444444445e-05, "loss": 3.6006, "norm_diff": 0.0426, "norm_loss": 0.0, "num_token_doc": 66.7845, "num_token_overlap": 15.8191, "num_token_query": 42.3868, "num_token_union": 68.5146, "num_word_context": 202.6361, "num_word_doc": 49.8097, "num_word_query": 32.0169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2887.5623, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3101, "query_norm": 1.4717, "queue_k_norm": 1.4241, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3868, "sent_len_1": 66.7845, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8688, "stdk": 0.0469, "stdq": 0.0448, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.6062, "doc_norm": 1.4195, "encoder_q-embeddings": 3829.8445, "encoder_q-layer.0": 3021.738, "encoder_q-layer.1": 3134.9282, "encoder_q-layer.10": 363.554, "encoder_q-layer.11": 902.6074, "encoder_q-layer.2": 3599.2515, "encoder_q-layer.3": 3479.5322, "encoder_q-layer.4": 3385.3794, "encoder_q-layer.5": 2868.113, "encoder_q-layer.6": 3276.2102, "encoder_q-layer.7": 2643.8167, "encoder_q-layer.8": 1627.7115, "encoder_q-layer.9": 749.7291, "epoch": 0.19, "inbatch_neg_score": 0.3221, "inbatch_pos_score": 0.9414, "learning_rate": 4.4888888888888894e-05, "loss": 3.6062, "norm_diff": 0.0372, "norm_loss": 0.0, "num_token_doc": 66.8592, "num_token_overlap": 15.8558, "num_token_query": 42.4098, "num_token_union": 68.5594, "num_word_context": 202.1511, "num_word_doc": 49.8804, "num_word_query": 32.0205, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4156.5632, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.4567, "queue_k_norm": 1.4222, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4098, "sent_len_1": 66.8592, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.33, "stdk": 0.0466, "stdq": 0.0445, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.5757, "doc_norm": 1.4228, "encoder_q-embeddings": 1400.2633, "encoder_q-layer.0": 1014.7714, "encoder_q-layer.1": 1197.793, "encoder_q-layer.10": 402.8197, "encoder_q-layer.11": 930.7174, "encoder_q-layer.2": 1261.2423, "encoder_q-layer.3": 1345.7513, "encoder_q-layer.4": 1433.5984, "encoder_q-layer.5": 1323.8365, "encoder_q-layer.6": 1312.3602, "encoder_q-layer.7": 1063.1138, "encoder_q-layer.8": 1084.1775, "encoder_q-layer.9": 582.9384, "epoch": 0.19, "inbatch_neg_score": 0.3334, "inbatch_pos_score": 0.9321, "learning_rate": 4.483333333333333e-05, "loss": 3.5757, "norm_diff": 0.0454, "norm_loss": 0.0, "num_token_doc": 66.762, "num_token_overlap": 15.8452, "num_token_query": 42.3551, "num_token_union": 68.5071, "num_word_context": 202.4372, "num_word_doc": 49.8408, "num_word_query": 31.9904, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1729.3134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3313, "query_norm": 1.4682, "queue_k_norm": 1.4217, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3551, "sent_len_1": 66.762, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5225, "stdk": 0.0467, "stdq": 0.0446, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.578, "doc_norm": 1.4203, "encoder_q-embeddings": 1047.0977, "encoder_q-layer.0": 709.335, "encoder_q-layer.1": 796.7609, "encoder_q-layer.10": 370.2869, "encoder_q-layer.11": 893.6003, "encoder_q-layer.2": 920.2996, "encoder_q-layer.3": 848.5121, "encoder_q-layer.4": 901.5125, "encoder_q-layer.5": 711.6689, "encoder_q-layer.6": 678.779, "encoder_q-layer.7": 595.2001, "encoder_q-layer.8": 539.9556, "encoder_q-layer.9": 374.4318, "epoch": 0.19, "inbatch_neg_score": 0.3214, "inbatch_pos_score": 0.9248, "learning_rate": 4.477777777777778e-05, "loss": 3.578, "norm_diff": 0.0336, "norm_loss": 0.0, "num_token_doc": 66.5263, "num_token_overlap": 15.748, "num_token_query": 42.2277, "num_token_union": 68.321, "num_word_context": 201.9471, "num_word_doc": 49.6335, "num_word_query": 31.8971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1159.1317, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3201, "query_norm": 1.4539, "queue_k_norm": 1.4221, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2277, "sent_len_1": 66.5263, "sent_len_max_0": 127.995, "sent_len_max_1": 190.02, "stdk": 0.0467, "stdq": 0.0445, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5828, "doc_norm": 1.4244, "encoder_q-embeddings": 843.991, "encoder_q-layer.0": 640.8661, "encoder_q-layer.1": 700.5809, "encoder_q-layer.10": 377.804, "encoder_q-layer.11": 831.785, "encoder_q-layer.2": 807.7769, "encoder_q-layer.3": 900.2427, "encoder_q-layer.4": 908.3739, "encoder_q-layer.5": 913.1625, "encoder_q-layer.6": 886.136, "encoder_q-layer.7": 825.3527, "encoder_q-layer.8": 757.7303, "encoder_q-layer.9": 445.8506, "epoch": 0.19, "inbatch_neg_score": 0.2942, "inbatch_pos_score": 0.9004, "learning_rate": 4.472222222222223e-05, "loss": 3.5828, "norm_diff": 0.0309, "norm_loss": 0.0, "num_token_doc": 66.7035, "num_token_overlap": 15.801, "num_token_query": 42.3418, "num_token_union": 68.4641, "num_word_context": 202.3327, "num_word_doc": 49.7631, "num_word_query": 31.9829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1147.8053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2922, "query_norm": 1.4553, "queue_k_norm": 1.4205, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3418, "sent_len_1": 66.7035, "sent_len_max_0": 128.0, "sent_len_max_1": 190.13, "stdk": 0.0469, "stdq": 0.0449, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.573, "doc_norm": 1.4275, "encoder_q-embeddings": 2741.5718, "encoder_q-layer.0": 1885.8536, "encoder_q-layer.1": 2029.0186, "encoder_q-layer.10": 384.6391, "encoder_q-layer.11": 955.2791, "encoder_q-layer.2": 2074.3955, "encoder_q-layer.3": 1985.8308, "encoder_q-layer.4": 1834.6807, "encoder_q-layer.5": 1695.2963, "encoder_q-layer.6": 1866.7964, "encoder_q-layer.7": 1827.5305, "encoder_q-layer.8": 1787.0776, "encoder_q-layer.9": 812.7854, "epoch": 0.19, "inbatch_neg_score": 0.3328, "inbatch_pos_score": 0.9619, "learning_rate": 4.466666666666667e-05, "loss": 3.573, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.6502, "num_token_overlap": 15.8246, "num_token_query": 42.2891, "num_token_union": 68.3962, "num_word_context": 202.3733, "num_word_doc": 49.7401, "num_word_query": 31.9377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2790.4087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3325, "query_norm": 1.4881, "queue_k_norm": 1.4232, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2891, "sent_len_1": 66.6502, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.4475, "stdk": 0.047, "stdq": 0.0454, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5628, "doc_norm": 1.4288, "encoder_q-embeddings": 771.9568, "encoder_q-layer.0": 551.5883, "encoder_q-layer.1": 597.2137, "encoder_q-layer.10": 385.0913, "encoder_q-layer.11": 951.8492, "encoder_q-layer.2": 723.163, "encoder_q-layer.3": 783.3651, "encoder_q-layer.4": 729.8311, "encoder_q-layer.5": 678.8533, "encoder_q-layer.6": 683.5266, "encoder_q-layer.7": 644.8151, "encoder_q-layer.8": 535.8075, "encoder_q-layer.9": 391.3688, "epoch": 0.19, "inbatch_neg_score": 0.3349, "inbatch_pos_score": 0.9585, "learning_rate": 4.461111111111111e-05, "loss": 3.5628, "norm_diff": 0.0474, "norm_loss": 0.0, "num_token_doc": 66.8496, "num_token_overlap": 15.839, "num_token_query": 42.2183, "num_token_union": 68.4118, "num_word_context": 202.3581, "num_word_doc": 49.8813, "num_word_query": 31.8797, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1005.1742, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3342, "query_norm": 1.4762, "queue_k_norm": 1.4234, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2183, "sent_len_1": 66.8496, "sent_len_max_0": 127.9825, "sent_len_max_1": 190.1387, "stdk": 0.0471, "stdq": 0.0451, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5786, "doc_norm": 1.4205, "encoder_q-embeddings": 555.3878, "encoder_q-layer.0": 363.2758, "encoder_q-layer.1": 390.1583, "encoder_q-layer.10": 372.5174, "encoder_q-layer.11": 904.0825, "encoder_q-layer.2": 447.7868, "encoder_q-layer.3": 504.4017, "encoder_q-layer.4": 514.7632, "encoder_q-layer.5": 560.3217, "encoder_q-layer.6": 592.1916, "encoder_q-layer.7": 523.2319, "encoder_q-layer.8": 439.8703, "encoder_q-layer.9": 331.6179, "epoch": 0.19, "inbatch_neg_score": 0.3194, "inbatch_pos_score": 0.9326, "learning_rate": 4.4555555555555555e-05, "loss": 3.5786, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.6749, "num_token_overlap": 15.7945, "num_token_query": 42.2398, "num_token_union": 68.4672, "num_word_context": 202.1485, "num_word_doc": 49.7899, "num_word_query": 31.9268, "postclip_grad_norm": 1.0, "preclip_grad_norm": 787.8501, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3179, "query_norm": 1.4324, "queue_k_norm": 1.4225, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2398, "sent_len_1": 66.6749, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4238, "stdk": 0.0467, "stdq": 0.0438, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.5587, "doc_norm": 1.4244, "encoder_q-embeddings": 1583.0958, "encoder_q-layer.0": 1064.6381, "encoder_q-layer.1": 1182.2848, "encoder_q-layer.10": 389.7508, "encoder_q-layer.11": 868.1074, "encoder_q-layer.2": 1287.9203, "encoder_q-layer.3": 1401.8018, "encoder_q-layer.4": 1406.9093, "encoder_q-layer.5": 1625.3352, "encoder_q-layer.6": 1404.5251, "encoder_q-layer.7": 1107.5027, "encoder_q-layer.8": 833.1975, "encoder_q-layer.9": 439.0599, "epoch": 0.19, "inbatch_neg_score": 0.2955, "inbatch_pos_score": 0.9185, "learning_rate": 4.4500000000000004e-05, "loss": 3.5587, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.9496, "num_token_overlap": 15.8688, "num_token_query": 42.3705, "num_token_union": 68.611, "num_word_context": 202.2541, "num_word_doc": 49.9615, "num_word_query": 32.0051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1786.4915, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2959, "query_norm": 1.4639, "queue_k_norm": 1.4228, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3705, "sent_len_1": 66.9496, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.2763, "stdk": 0.0469, "stdq": 0.0453, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.5806, "doc_norm": 1.422, "encoder_q-embeddings": 974.9955, "encoder_q-layer.0": 671.1716, "encoder_q-layer.1": 753.8429, "encoder_q-layer.10": 350.1544, "encoder_q-layer.11": 859.2075, "encoder_q-layer.2": 831.7155, "encoder_q-layer.3": 850.7289, "encoder_q-layer.4": 742.6629, "encoder_q-layer.5": 709.7758, "encoder_q-layer.6": 726.9589, "encoder_q-layer.7": 765.2865, "encoder_q-layer.8": 742.2153, "encoder_q-layer.9": 460.8895, "epoch": 0.2, "inbatch_neg_score": 0.3298, "inbatch_pos_score": 0.9497, "learning_rate": 4.4444444444444447e-05, "loss": 3.5806, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.6659, "num_token_overlap": 15.804, "num_token_query": 42.0997, "num_token_union": 68.3124, "num_word_context": 202.0282, "num_word_doc": 49.7927, "num_word_query": 31.7849, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1127.9048, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3284, "query_norm": 1.4595, "queue_k_norm": 1.4205, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.0997, "sent_len_1": 66.6659, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.0762, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 27.5408, "dev_samples_per_second": 2.324, "dev_steps_per_second": 0.036, "epoch": 0.2, "step": 20000, "test_accuracy": 91.58935546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.48506489396095276, "test_doc_norm": 1.383345127105713, "test_inbatch_neg_score": 0.6642871499061584, "test_inbatch_pos_score": 1.4889001846313477, "test_loss": 0.48506489396095276, "test_loss_align": 1.189328908920288, "test_loss_unif": 3.767944812774658, "test_loss_unif_q@queue": 3.767944812774658, "test_norm_diff": 0.10937898606061935, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3307132124900818, "test_query_norm": 1.4927239418029785, "test_queue_k_norm": 1.4204221963882446, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04001081734895706, "test_stdq": 0.04053826257586479, "test_stdqueue_k": 0.046863969415426254, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.5408, "dev_samples_per_second": 2.324, "dev_steps_per_second": 0.036, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.31768, "eval_beir-arguana_recall@10": 0.5505, "eval_beir-arguana_recall@100": 0.85704, "eval_beir-arguana_recall@20": 0.67994, "eval_beir-avg_ndcg@10": 0.320499, "eval_beir-avg_recall@10": 0.38500941666666666, "eval_beir-avg_recall@100": 0.5694265000000001, "eval_beir-avg_recall@20": 0.4457134166666667, "eval_beir-cqadupstack_ndcg@10": 0.21421999999999997, "eval_beir-cqadupstack_recall@10": 0.29737416666666666, "eval_beir-cqadupstack_recall@100": 0.5174249999999999, "eval_beir-cqadupstack_recall@20": 0.35892416666666666, "eval_beir-fiqa_ndcg@10": 0.1819, "eval_beir-fiqa_recall@10": 0.23936, "eval_beir-fiqa_recall@100": 0.48815, "eval_beir-fiqa_recall@20": 0.31164, "eval_beir-nfcorpus_ndcg@10": 0.2462, "eval_beir-nfcorpus_recall@10": 0.11983, "eval_beir-nfcorpus_recall@100": 0.2464, "eval_beir-nfcorpus_recall@20": 0.15589, "eval_beir-nq_ndcg@10": 0.21446, "eval_beir-nq_recall@10": 0.35607, "eval_beir-nq_recall@100": 0.69332, "eval_beir-nq_recall@20": 0.45964, "eval_beir-quora_ndcg@10": 0.69383, "eval_beir-quora_recall@10": 0.8187, "eval_beir-quora_recall@100": 0.9531, "eval_beir-quora_recall@20": 0.87403, "eval_beir-scidocs_ndcg@10": 0.12406, "eval_beir-scidocs_recall@10": 0.13073, "eval_beir-scidocs_recall@100": 0.32485, "eval_beir-scidocs_recall@20": 0.18457, "eval_beir-scifact_ndcg@10": 0.55929, "eval_beir-scifact_recall@10": 0.71017, "eval_beir-scifact_recall@100": 0.879, "eval_beir-scifact_recall@20": 0.78072, "eval_beir-trec-covid_ndcg@10": 0.45708, "eval_beir-trec-covid_recall@10": 0.488, "eval_beir-trec-covid_recall@100": 0.3382, "eval_beir-trec-covid_recall@20": 0.453, "eval_beir-webis-touche2020_ndcg@10": 0.19627, "eval_beir-webis-touche2020_recall@10": 0.13936, "eval_beir-webis-touche2020_recall@100": 0.39678, "eval_beir-webis-touche2020_recall@20": 0.19878, "eval_senteval-avg_sts": 0.7219990230229205, "eval_senteval-sickr_spearman": 0.6944407251641442, "eval_senteval-stsb_spearman": 0.7495573208816967, "step": 20000, "test_accuracy": 91.58935546875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.48506489396095276, "test_doc_norm": 1.383345127105713, "test_inbatch_neg_score": 0.6642871499061584, "test_inbatch_pos_score": 1.4889001846313477, "test_loss": 0.48506489396095276, "test_loss_align": 1.189328908920288, "test_loss_unif": 3.767944812774658, "test_loss_unif_q@queue": 3.767944812774658, "test_norm_diff": 0.10937898606061935, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.3307132124900818, "test_query_norm": 1.4927239418029785, "test_queue_k_norm": 1.4204221963882446, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04001081734895706, "test_stdq": 0.04053826257586479, "test_stdqueue_k": 0.046863969415426254, "test_stdqueue_q": 0.0 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5833, "doc_norm": 1.4202, "encoder_q-embeddings": 1590.9521, "encoder_q-layer.0": 1161.5155, "encoder_q-layer.1": 1400.4048, "encoder_q-layer.10": 355.553, "encoder_q-layer.11": 813.2335, "encoder_q-layer.2": 1495.5247, "encoder_q-layer.3": 1572.5034, "encoder_q-layer.4": 1491.713, "encoder_q-layer.5": 1397.5281, "encoder_q-layer.6": 1270.7852, "encoder_q-layer.7": 1055.7762, "encoder_q-layer.8": 789.0936, "encoder_q-layer.9": 375.8982, "epoch": 0.2, "inbatch_neg_score": 0.3066, "inbatch_pos_score": 0.937, "learning_rate": 4.438888888888889e-05, "loss": 3.5833, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.8359, "num_token_overlap": 15.8107, "num_token_query": 42.4318, "num_token_union": 68.5715, "num_word_context": 202.572, "num_word_doc": 49.8823, "num_word_query": 32.0651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1849.7367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.4543, "queue_k_norm": 1.4217, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4318, "sent_len_1": 66.8359, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.055, "stdk": 0.0468, "stdq": 0.0447, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.6019, "doc_norm": 1.4178, "encoder_q-embeddings": 2036.3741, "encoder_q-layer.0": 1383.5925, "encoder_q-layer.1": 1637.8967, "encoder_q-layer.10": 404.0049, "encoder_q-layer.11": 931.0297, "encoder_q-layer.2": 1773.7614, "encoder_q-layer.3": 1851.6194, "encoder_q-layer.4": 2202.5535, "encoder_q-layer.5": 2257.0557, "encoder_q-layer.6": 1780.6504, "encoder_q-layer.7": 1613.7457, "encoder_q-layer.8": 1276.8378, "encoder_q-layer.9": 685.7148, "epoch": 0.2, "inbatch_neg_score": 0.3101, "inbatch_pos_score": 0.8999, "learning_rate": 4.433333333333334e-05, "loss": 3.6019, "norm_diff": 0.0165, "norm_loss": 0.0, "num_token_doc": 66.7243, "num_token_overlap": 15.8122, "num_token_query": 42.2745, "num_token_union": 68.4327, "num_word_context": 202.0342, "num_word_doc": 49.7777, "num_word_query": 31.9154, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2415.0975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3088, "query_norm": 1.4321, "queue_k_norm": 1.4211, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2745, "sent_len_1": 66.7243, "sent_len_max_0": 128.0, "sent_len_max_1": 191.865, "stdk": 0.0467, "stdq": 0.0438, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.5883, "doc_norm": 1.4207, "encoder_q-embeddings": 808.5865, "encoder_q-layer.0": 564.5675, "encoder_q-layer.1": 645.7311, "encoder_q-layer.10": 336.9767, "encoder_q-layer.11": 862.389, "encoder_q-layer.2": 694.4616, "encoder_q-layer.3": 765.7739, "encoder_q-layer.4": 762.7094, "encoder_q-layer.5": 646.4224, "encoder_q-layer.6": 690.7312, "encoder_q-layer.7": 701.3927, "encoder_q-layer.8": 622.2391, "encoder_q-layer.9": 406.6534, "epoch": 0.2, "inbatch_neg_score": 0.3152, "inbatch_pos_score": 0.9307, "learning_rate": 4.427777777777778e-05, "loss": 3.5883, "norm_diff": 0.0265, "norm_loss": 0.0, "num_token_doc": 66.9642, "num_token_overlap": 15.71, "num_token_query": 42.0709, "num_token_union": 68.5299, "num_word_context": 202.3514, "num_word_doc": 49.9446, "num_word_query": 31.7492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1014.0052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3137, "query_norm": 1.4439, "queue_k_norm": 1.4228, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.0709, "sent_len_1": 66.9642, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.9387, "stdk": 0.0469, "stdq": 0.044, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.5544, "doc_norm": 1.4231, "encoder_q-embeddings": 512.6042, "encoder_q-layer.0": 347.8314, "encoder_q-layer.1": 385.4198, "encoder_q-layer.10": 362.3925, "encoder_q-layer.11": 841.5034, "encoder_q-layer.2": 447.5233, "encoder_q-layer.3": 485.3835, "encoder_q-layer.4": 556.0548, "encoder_q-layer.5": 631.6113, "encoder_q-layer.6": 641.9337, "encoder_q-layer.7": 597.719, "encoder_q-layer.8": 514.9669, "encoder_q-layer.9": 358.0535, "epoch": 0.2, "inbatch_neg_score": 0.281, "inbatch_pos_score": 0.874, "learning_rate": 4.422222222222222e-05, "loss": 3.5544, "norm_diff": 0.0192, "norm_loss": 0.0, "num_token_doc": 66.9519, "num_token_overlap": 15.9056, "num_token_query": 42.4485, "num_token_union": 68.606, "num_word_context": 202.3823, "num_word_doc": 49.9861, "num_word_query": 32.0733, "postclip_grad_norm": 1.0, "preclip_grad_norm": 780.7878, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2812, "query_norm": 1.4349, "queue_k_norm": 1.4226, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4485, "sent_len_1": 66.9519, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.9613, "stdk": 0.0469, "stdq": 0.0442, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.5799, "doc_norm": 1.4211, "encoder_q-embeddings": 1227.2114, "encoder_q-layer.0": 884.282, "encoder_q-layer.1": 969.7026, "encoder_q-layer.10": 405.6187, "encoder_q-layer.11": 906.0212, "encoder_q-layer.2": 1042.5757, "encoder_q-layer.3": 1125.9738, "encoder_q-layer.4": 1150.6847, "encoder_q-layer.5": 1225.2401, "encoder_q-layer.6": 1169.7423, "encoder_q-layer.7": 1278.9946, "encoder_q-layer.8": 1084.9998, "encoder_q-layer.9": 654.7321, "epoch": 0.2, "inbatch_neg_score": 0.3386, "inbatch_pos_score": 0.9521, "learning_rate": 4.4166666666666665e-05, "loss": 3.5799, "norm_diff": 0.0385, "norm_loss": 0.0, "num_token_doc": 66.6608, "num_token_overlap": 15.7939, "num_token_query": 42.4124, "num_token_union": 68.4773, "num_word_context": 202.2522, "num_word_doc": 49.7422, "num_word_query": 32.0283, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1546.6029, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3384, "query_norm": 1.4596, "queue_k_norm": 1.4215, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4124, "sent_len_1": 66.6608, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6225, "stdk": 0.0469, "stdq": 0.0447, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 49.707, "active_queue_size": 16384.0, "cl_loss": 3.5692, "doc_norm": 1.4264, "encoder_q-embeddings": 1565.5272, "encoder_q-layer.0": 1116.8612, "encoder_q-layer.1": 1228.7087, "encoder_q-layer.10": 398.8412, "encoder_q-layer.11": 927.9033, "encoder_q-layer.2": 1283.1594, "encoder_q-layer.3": 1404.5411, "encoder_q-layer.4": 1430.7423, "encoder_q-layer.5": 1253.3418, "encoder_q-layer.6": 1278.7734, "encoder_q-layer.7": 1003.1733, "encoder_q-layer.8": 813.929, "encoder_q-layer.9": 440.5386, "epoch": 0.2, "inbatch_neg_score": 0.3261, "inbatch_pos_score": 0.9429, "learning_rate": 4.4111111111111114e-05, "loss": 3.5692, "norm_diff": 0.0334, "norm_loss": 0.0, "num_token_doc": 66.6488, "num_token_overlap": 15.8117, "num_token_query": 42.2124, "num_token_union": 68.3802, "num_word_context": 202.1344, "num_word_doc": 49.7485, "num_word_query": 31.8929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1726.9597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.4598, "queue_k_norm": 1.4226, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2124, "sent_len_1": 66.6488, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0188, "stdk": 0.047, "stdq": 0.045, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.5454, "doc_norm": 1.4295, "encoder_q-embeddings": 6310.1372, "encoder_q-layer.0": 4740.769, "encoder_q-layer.1": 5029.2539, "encoder_q-layer.10": 772.6583, "encoder_q-layer.11": 1826.3605, "encoder_q-layer.2": 5369.5894, "encoder_q-layer.3": 6116.8691, "encoder_q-layer.4": 5563.2798, "encoder_q-layer.5": 5381.6143, "encoder_q-layer.6": 4230.1831, "encoder_q-layer.7": 3969.4609, "encoder_q-layer.8": 3076.0479, "encoder_q-layer.9": 1566.2784, "epoch": 0.2, "inbatch_neg_score": 0.3407, "inbatch_pos_score": 0.9639, "learning_rate": 4.4055555555555557e-05, "loss": 3.5454, "norm_diff": 0.0168, "norm_loss": 0.0, "num_token_doc": 66.876, "num_token_overlap": 15.8583, "num_token_query": 42.2522, "num_token_union": 68.4641, "num_word_context": 202.1977, "num_word_doc": 49.8889, "num_word_query": 31.91, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6900.953, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3396, "query_norm": 1.4433, "queue_k_norm": 1.4252, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2522, "sent_len_1": 66.876, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.6863, "stdk": 0.0472, "stdq": 0.044, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.5835, "doc_norm": 1.4226, "encoder_q-embeddings": 4458.7734, "encoder_q-layer.0": 3193.9592, "encoder_q-layer.1": 3950.9006, "encoder_q-layer.10": 708.3882, "encoder_q-layer.11": 1818.3113, "encoder_q-layer.2": 4254.1641, "encoder_q-layer.3": 4804.166, "encoder_q-layer.4": 5011.7109, "encoder_q-layer.5": 5432.3252, "encoder_q-layer.6": 5748.1587, "encoder_q-layer.7": 5109.4565, "encoder_q-layer.8": 4897.6221, "encoder_q-layer.9": 2073.7952, "epoch": 0.2, "inbatch_neg_score": 0.3276, "inbatch_pos_score": 0.9419, "learning_rate": 4.4000000000000006e-05, "loss": 3.5835, "norm_diff": 0.0236, "norm_loss": 0.0, "num_token_doc": 66.6485, "num_token_overlap": 15.8203, "num_token_query": 42.2692, "num_token_union": 68.3886, "num_word_context": 202.2124, "num_word_doc": 49.7193, "num_word_query": 31.9206, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6352.4064, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3269, "query_norm": 1.4463, "queue_k_norm": 1.4251, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2692, "sent_len_1": 66.6485, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.84, "stdk": 0.0469, "stdq": 0.0447, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.5591, "doc_norm": 1.4201, "encoder_q-embeddings": 1707.7876, "encoder_q-layer.0": 1251.2235, "encoder_q-layer.1": 1459.9684, "encoder_q-layer.10": 790.8908, "encoder_q-layer.11": 1815.7935, "encoder_q-layer.2": 1726.0111, "encoder_q-layer.3": 2009.8722, "encoder_q-layer.4": 2016.6757, "encoder_q-layer.5": 1945.1552, "encoder_q-layer.6": 1999.7815, "encoder_q-layer.7": 1726.436, "encoder_q-layer.8": 1454.7473, "encoder_q-layer.9": 925.1656, "epoch": 0.2, "inbatch_neg_score": 0.3184, "inbatch_pos_score": 0.9062, "learning_rate": 4.394444444444445e-05, "loss": 3.5591, "norm_diff": 0.0096, "norm_loss": 0.0, "num_token_doc": 67.0919, "num_token_overlap": 15.8805, "num_token_query": 42.263, "num_token_union": 68.5714, "num_word_context": 202.7763, "num_word_doc": 50.1162, "num_word_query": 31.9275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2441.4976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3174, "query_norm": 1.4122, "queue_k_norm": 1.4265, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.263, "sent_len_1": 67.0919, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1825, "stdk": 0.0468, "stdq": 0.0435, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.5614, "doc_norm": 1.4305, "encoder_q-embeddings": 1558.7812, "encoder_q-layer.0": 1144.827, "encoder_q-layer.1": 1297.894, "encoder_q-layer.10": 779.4329, "encoder_q-layer.11": 1845.5554, "encoder_q-layer.2": 1461.7745, "encoder_q-layer.3": 1621.0427, "encoder_q-layer.4": 1487.9866, "encoder_q-layer.5": 1310.0332, "encoder_q-layer.6": 1318.1642, "encoder_q-layer.7": 1167.8193, "encoder_q-layer.8": 1071.4568, "encoder_q-layer.9": 805.8049, "epoch": 0.21, "inbatch_neg_score": 0.3282, "inbatch_pos_score": 0.9297, "learning_rate": 4.388888888888889e-05, "loss": 3.5614, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 66.7805, "num_token_overlap": 15.823, "num_token_query": 42.3173, "num_token_union": 68.53, "num_word_context": 202.3613, "num_word_doc": 49.841, "num_word_query": 31.9627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2017.6307, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3267, "query_norm": 1.425, "queue_k_norm": 1.4262, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3173, "sent_len_1": 66.7805, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2175, "stdk": 0.0472, "stdq": 0.0437, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.5929, "doc_norm": 1.4222, "encoder_q-embeddings": 3759.001, "encoder_q-layer.0": 2547.8545, "encoder_q-layer.1": 2785.782, "encoder_q-layer.10": 728.2385, "encoder_q-layer.11": 1759.5345, "encoder_q-layer.2": 3082.7727, "encoder_q-layer.3": 2878.8691, "encoder_q-layer.4": 2652.3208, "encoder_q-layer.5": 2356.8794, "encoder_q-layer.6": 2867.1162, "encoder_q-layer.7": 2750.7981, "encoder_q-layer.8": 2207.6592, "encoder_q-layer.9": 1122.0339, "epoch": 0.21, "inbatch_neg_score": 0.3363, "inbatch_pos_score": 0.9619, "learning_rate": 4.383333333333334e-05, "loss": 3.5929, "norm_diff": 0.0303, "norm_loss": 0.0, "num_token_doc": 66.6568, "num_token_overlap": 15.8141, "num_token_query": 42.2873, "num_token_union": 68.36, "num_word_context": 202.0233, "num_word_doc": 49.6893, "num_word_query": 31.9167, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3935.7981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3354, "query_norm": 1.4525, "queue_k_norm": 1.4244, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2873, "sent_len_1": 66.6568, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2825, "stdk": 0.0469, "stdq": 0.0445, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.5783, "doc_norm": 1.4223, "encoder_q-embeddings": 1967.1481, "encoder_q-layer.0": 1335.4098, "encoder_q-layer.1": 1478.1102, "encoder_q-layer.10": 918.912, "encoder_q-layer.11": 1929.8276, "encoder_q-layer.2": 1683.062, "encoder_q-layer.3": 1826.6263, "encoder_q-layer.4": 2011.3278, "encoder_q-layer.5": 1809.4996, "encoder_q-layer.6": 1938.975, "encoder_q-layer.7": 1685.6051, "encoder_q-layer.8": 1475.5591, "encoder_q-layer.9": 899.7048, "epoch": 0.21, "inbatch_neg_score": 0.3354, "inbatch_pos_score": 0.9702, "learning_rate": 4.377777777777778e-05, "loss": 3.5783, "norm_diff": 0.0627, "norm_loss": 0.0, "num_token_doc": 66.7723, "num_token_overlap": 15.8509, "num_token_query": 42.4252, "num_token_union": 68.5612, "num_word_context": 202.4405, "num_word_doc": 49.8752, "num_word_query": 32.073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2472.7708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3364, "query_norm": 1.485, "queue_k_norm": 1.4255, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4252, "sent_len_1": 66.7723, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.505, "stdk": 0.0469, "stdq": 0.0459, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.5647, "doc_norm": 1.4235, "encoder_q-embeddings": 5998.5498, "encoder_q-layer.0": 4477.252, "encoder_q-layer.1": 5170.8755, "encoder_q-layer.10": 760.2349, "encoder_q-layer.11": 1799.0059, "encoder_q-layer.2": 5047.5356, "encoder_q-layer.3": 5342.1812, "encoder_q-layer.4": 5397.9102, "encoder_q-layer.5": 4724.9951, "encoder_q-layer.6": 4723.7402, "encoder_q-layer.7": 3994.5356, "encoder_q-layer.8": 3118.1892, "encoder_q-layer.9": 1281.8997, "epoch": 0.21, "inbatch_neg_score": 0.3275, "inbatch_pos_score": 0.9658, "learning_rate": 4.3722222222222224e-05, "loss": 3.5647, "norm_diff": 0.0476, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 15.8786, "num_token_query": 42.2903, "num_token_union": 68.378, "num_word_context": 202.2394, "num_word_doc": 49.8149, "num_word_query": 31.9365, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6598.8104, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3254, "query_norm": 1.4711, "queue_k_norm": 1.425, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2903, "sent_len_1": 66.778, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7637, "stdk": 0.0469, "stdq": 0.0458, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.5782, "doc_norm": 1.4218, "encoder_q-embeddings": 2582.7971, "encoder_q-layer.0": 1944.162, "encoder_q-layer.1": 2253.5742, "encoder_q-layer.10": 720.5896, "encoder_q-layer.11": 1696.6417, "encoder_q-layer.2": 2340.0571, "encoder_q-layer.3": 2462.7793, "encoder_q-layer.4": 2520.0359, "encoder_q-layer.5": 2568.344, "encoder_q-layer.6": 2585.5598, "encoder_q-layer.7": 2012.1581, "encoder_q-layer.8": 1675.6028, "encoder_q-layer.9": 1028.0458, "epoch": 0.21, "inbatch_neg_score": 0.3063, "inbatch_pos_score": 0.9121, "learning_rate": 4.3666666666666666e-05, "loss": 3.5782, "norm_diff": 0.0151, "norm_loss": 0.0, "num_token_doc": 66.5514, "num_token_overlap": 15.8238, "num_token_query": 42.4347, "num_token_union": 68.3907, "num_word_context": 201.9326, "num_word_doc": 49.6949, "num_word_query": 32.0576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3184.3022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3054, "query_norm": 1.4253, "queue_k_norm": 1.4247, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4347, "sent_len_1": 66.5514, "sent_len_max_0": 128.0, "sent_len_max_1": 188.13, "stdk": 0.0469, "stdq": 0.0441, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.5942, "doc_norm": 1.4207, "encoder_q-embeddings": 3373.0557, "encoder_q-layer.0": 2488.957, "encoder_q-layer.1": 3069.5261, "encoder_q-layer.10": 774.4435, "encoder_q-layer.11": 1865.0886, "encoder_q-layer.2": 3723.5728, "encoder_q-layer.3": 3659.8738, "encoder_q-layer.4": 4056.572, "encoder_q-layer.5": 3585.1167, "encoder_q-layer.6": 3556.2881, "encoder_q-layer.7": 3557.8906, "encoder_q-layer.8": 2814.3518, "encoder_q-layer.9": 1171.8604, "epoch": 0.21, "inbatch_neg_score": 0.3149, "inbatch_pos_score": 0.9092, "learning_rate": 4.3611111111111116e-05, "loss": 3.5942, "norm_diff": 0.0118, "norm_loss": 0.0, "num_token_doc": 66.8259, "num_token_overlap": 15.7558, "num_token_query": 42.2055, "num_token_union": 68.5045, "num_word_context": 202.2893, "num_word_doc": 49.8855, "num_word_query": 31.8776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4535.3797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3145, "query_norm": 1.4256, "queue_k_norm": 1.4244, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2055, "sent_len_1": 66.8259, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0062, "stdk": 0.0469, "stdq": 0.0438, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.569, "doc_norm": 1.425, "encoder_q-embeddings": 3628.9084, "encoder_q-layer.0": 2466.7617, "encoder_q-layer.1": 2718.2957, "encoder_q-layer.10": 767.6279, "encoder_q-layer.11": 1754.0159, "encoder_q-layer.2": 3150.3723, "encoder_q-layer.3": 3060.824, "encoder_q-layer.4": 3039.2095, "encoder_q-layer.5": 3022.8979, "encoder_q-layer.6": 3070.6162, "encoder_q-layer.7": 2665.7737, "encoder_q-layer.8": 1591.8202, "encoder_q-layer.9": 878.7118, "epoch": 0.21, "inbatch_neg_score": 0.3063, "inbatch_pos_score": 0.9307, "learning_rate": 4.355555555555556e-05, "loss": 3.569, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.8798, "num_token_overlap": 15.8645, "num_token_query": 42.4849, "num_token_union": 68.6093, "num_word_context": 202.2254, "num_word_doc": 49.8878, "num_word_query": 32.0939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3926.5134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.4195, "queue_k_norm": 1.4243, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4849, "sent_len_1": 66.8798, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.655, "stdk": 0.0471, "stdq": 0.0442, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5572, "doc_norm": 1.4216, "encoder_q-embeddings": 9287.3389, "encoder_q-layer.0": 7203.8154, "encoder_q-layer.1": 7459.3926, "encoder_q-layer.10": 764.7832, "encoder_q-layer.11": 1805.7683, "encoder_q-layer.2": 8022.0322, "encoder_q-layer.3": 8774.5117, "encoder_q-layer.4": 7972.0405, "encoder_q-layer.5": 7872.9131, "encoder_q-layer.6": 9756.293, "encoder_q-layer.7": 9177.8408, "encoder_q-layer.8": 4493.5015, "encoder_q-layer.9": 1763.3718, "epoch": 0.21, "inbatch_neg_score": 0.3201, "inbatch_pos_score": 0.9126, "learning_rate": 4.35e-05, "loss": 3.5572, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 66.6979, "num_token_overlap": 15.8908, "num_token_query": 42.4864, "num_token_union": 68.46, "num_word_context": 202.3514, "num_word_doc": 49.7846, "num_word_query": 32.0993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10873.0144, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3203, "query_norm": 1.416, "queue_k_norm": 1.4231, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4864, "sent_len_1": 66.6979, "sent_len_max_0": 128.0, "sent_len_max_1": 188.155, "stdk": 0.0469, "stdq": 0.0436, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 49.4141, "active_queue_size": 16384.0, "cl_loss": 3.5903, "doc_norm": 1.4261, "encoder_q-embeddings": 9976.1299, "encoder_q-layer.0": 7014.314, "encoder_q-layer.1": 7477.0747, "encoder_q-layer.10": 917.7895, "encoder_q-layer.11": 1860.1074, "encoder_q-layer.2": 8664.4893, "encoder_q-layer.3": 8894.0537, "encoder_q-layer.4": 9747.624, "encoder_q-layer.5": 8418.1104, "encoder_q-layer.6": 7500.7207, "encoder_q-layer.7": 7860.1987, "encoder_q-layer.8": 6503.8594, "encoder_q-layer.9": 3007.676, "epoch": 0.21, "inbatch_neg_score": 0.3129, "inbatch_pos_score": 0.9429, "learning_rate": 4.344444444444445e-05, "loss": 3.5903, "norm_diff": 0.0327, "norm_loss": 0.0, "num_token_doc": 66.7711, "num_token_overlap": 15.8009, "num_token_query": 42.1017, "num_token_union": 68.4078, "num_word_context": 202.2496, "num_word_doc": 49.8242, "num_word_query": 31.8078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11131.6703, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3125, "query_norm": 1.4588, "queue_k_norm": 1.4253, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1017, "sent_len_1": 66.7711, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3063, "stdk": 0.0471, "stdq": 0.0454, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5928, "doc_norm": 1.4176, "encoder_q-embeddings": 2348.4014, "encoder_q-layer.0": 1625.8958, "encoder_q-layer.1": 1729.5203, "encoder_q-layer.10": 709.092, "encoder_q-layer.11": 1635.8134, "encoder_q-layer.2": 1861.7832, "encoder_q-layer.3": 2030.3789, "encoder_q-layer.4": 1993.8114, "encoder_q-layer.5": 1957.0333, "encoder_q-layer.6": 2042.171, "encoder_q-layer.7": 1584.5339, "encoder_q-layer.8": 1440.1636, "encoder_q-layer.9": 869.3599, "epoch": 0.21, "inbatch_neg_score": 0.3111, "inbatch_pos_score": 0.9355, "learning_rate": 4.338888888888889e-05, "loss": 3.5928, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 66.9019, "num_token_overlap": 15.7834, "num_token_query": 42.1947, "num_token_union": 68.5182, "num_word_context": 202.8188, "num_word_doc": 49.9378, "num_word_query": 31.8721, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2626.4168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.311, "query_norm": 1.4355, "queue_k_norm": 1.4248, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1947, "sent_len_1": 66.9019, "sent_len_max_0": 128.0, "sent_len_max_1": 188.415, "stdk": 0.0468, "stdq": 0.0449, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.563, "doc_norm": 1.4272, "encoder_q-embeddings": 3924.3357, "encoder_q-layer.0": 2701.0117, "encoder_q-layer.1": 2836.01, "encoder_q-layer.10": 737.1766, "encoder_q-layer.11": 1758.2262, "encoder_q-layer.2": 2642.0972, "encoder_q-layer.3": 2746.7742, "encoder_q-layer.4": 2621.5833, "encoder_q-layer.5": 2300.3364, "encoder_q-layer.6": 2267.8113, "encoder_q-layer.7": 2052.3945, "encoder_q-layer.8": 1760.6428, "encoder_q-layer.9": 1113.606, "epoch": 0.21, "inbatch_neg_score": 0.3157, "inbatch_pos_score": 0.9287, "learning_rate": 4.3333333333333334e-05, "loss": 3.563, "norm_diff": 0.0297, "norm_loss": 0.0, "num_token_doc": 66.8979, "num_token_overlap": 15.933, "num_token_query": 42.3937, "num_token_union": 68.5364, "num_word_context": 202.6697, "num_word_doc": 49.9556, "num_word_query": 31.9956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3726.4221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3154, "query_norm": 1.4569, "queue_k_norm": 1.4245, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3937, "sent_len_1": 66.8979, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.9487, "stdk": 0.0472, "stdq": 0.0453, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5641, "doc_norm": 1.4305, "encoder_q-embeddings": 2853.4177, "encoder_q-layer.0": 1964.3944, "encoder_q-layer.1": 2359.2368, "encoder_q-layer.10": 760.5689, "encoder_q-layer.11": 1681.0431, "encoder_q-layer.2": 2492.5071, "encoder_q-layer.3": 2606.4495, "encoder_q-layer.4": 2370.593, "encoder_q-layer.5": 2013.8351, "encoder_q-layer.6": 2191.2451, "encoder_q-layer.7": 2076.7224, "encoder_q-layer.8": 1714.4015, "encoder_q-layer.9": 883.0466, "epoch": 0.22, "inbatch_neg_score": 0.2966, "inbatch_pos_score": 0.9277, "learning_rate": 4.3277777777777776e-05, "loss": 3.5641, "norm_diff": 0.0097, "norm_loss": 0.0, "num_token_doc": 66.7756, "num_token_overlap": 15.8139, "num_token_query": 42.3034, "num_token_union": 68.4425, "num_word_context": 201.8354, "num_word_doc": 49.7832, "num_word_query": 31.9467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3156.4862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.4272, "queue_k_norm": 1.4248, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3034, "sent_len_1": 66.7756, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.6113, "stdk": 0.0473, "stdq": 0.0444, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.5416, "doc_norm": 1.4247, "encoder_q-embeddings": 1980.0704, "encoder_q-layer.0": 1384.8179, "encoder_q-layer.1": 1596.359, "encoder_q-layer.10": 757.0261, "encoder_q-layer.11": 1852.5326, "encoder_q-layer.2": 1791.0123, "encoder_q-layer.3": 1754.8983, "encoder_q-layer.4": 1728.3595, "encoder_q-layer.5": 1553.2152, "encoder_q-layer.6": 1624.3282, "encoder_q-layer.7": 1507.3361, "encoder_q-layer.8": 1310.8899, "encoder_q-layer.9": 810.3773, "epoch": 0.22, "inbatch_neg_score": 0.3057, "inbatch_pos_score": 0.9443, "learning_rate": 4.3222222222222226e-05, "loss": 3.5416, "norm_diff": 0.0383, "norm_loss": 0.0, "num_token_doc": 66.7592, "num_token_overlap": 15.9251, "num_token_query": 42.5621, "num_token_union": 68.5429, "num_word_context": 202.4671, "num_word_doc": 49.8309, "num_word_query": 32.1771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2349.4403, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3062, "query_norm": 1.463, "queue_k_norm": 1.4268, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5621, "sent_len_1": 66.7592, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5213, "stdk": 0.0472, "stdq": 0.0454, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.568, "doc_norm": 1.4278, "encoder_q-embeddings": 1958.8882, "encoder_q-layer.0": 1338.0436, "encoder_q-layer.1": 1455.3175, "encoder_q-layer.10": 992.5293, "encoder_q-layer.11": 2068.6956, "encoder_q-layer.2": 1646.4329, "encoder_q-layer.3": 1732.8666, "encoder_q-layer.4": 1840.9169, "encoder_q-layer.5": 1724.6978, "encoder_q-layer.6": 1760.1914, "encoder_q-layer.7": 1565.1133, "encoder_q-layer.8": 1316.8755, "encoder_q-layer.9": 874.7789, "epoch": 0.22, "inbatch_neg_score": 0.3129, "inbatch_pos_score": 0.9092, "learning_rate": 4.316666666666667e-05, "loss": 3.568, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.9059, "num_token_overlap": 15.7895, "num_token_query": 42.2189, "num_token_union": 68.4978, "num_word_context": 202.4279, "num_word_doc": 49.9, "num_word_query": 31.895, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2407.0595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3125, "query_norm": 1.4215, "queue_k_norm": 1.4257, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2189, "sent_len_1": 66.9059, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2287, "stdk": 0.0473, "stdq": 0.0438, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.5851, "doc_norm": 1.4305, "encoder_q-embeddings": 1102.8768, "encoder_q-layer.0": 769.36, "encoder_q-layer.1": 817.5546, "encoder_q-layer.10": 765.8163, "encoder_q-layer.11": 1979.7911, "encoder_q-layer.2": 861.9375, "encoder_q-layer.3": 857.8926, "encoder_q-layer.4": 835.126, "encoder_q-layer.5": 847.6546, "encoder_q-layer.6": 882.611, "encoder_q-layer.7": 916.1126, "encoder_q-layer.8": 927.8949, "encoder_q-layer.9": 737.7395, "epoch": 0.22, "inbatch_neg_score": 0.3245, "inbatch_pos_score": 0.9541, "learning_rate": 4.311111111111111e-05, "loss": 3.5851, "norm_diff": 0.032, "norm_loss": 0.0, "num_token_doc": 66.816, "num_token_overlap": 15.8379, "num_token_query": 42.3537, "num_token_union": 68.5288, "num_word_context": 202.3775, "num_word_doc": 49.8629, "num_word_query": 31.9865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1507.7754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.4625, "queue_k_norm": 1.4252, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3537, "sent_len_1": 66.816, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.64, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5533, "doc_norm": 1.4273, "encoder_q-embeddings": 2322.2041, "encoder_q-layer.0": 1703.9359, "encoder_q-layer.1": 1856.8497, "encoder_q-layer.10": 731.1077, "encoder_q-layer.11": 1732.9781, "encoder_q-layer.2": 2305.634, "encoder_q-layer.3": 2448.7126, "encoder_q-layer.4": 2409.876, "encoder_q-layer.5": 2066.2991, "encoder_q-layer.6": 2268.988, "encoder_q-layer.7": 2169.6836, "encoder_q-layer.8": 2016.8097, "encoder_q-layer.9": 1073.7782, "epoch": 0.22, "inbatch_neg_score": 0.3217, "inbatch_pos_score": 0.9692, "learning_rate": 4.305555555555556e-05, "loss": 3.5533, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 67.1421, "num_token_overlap": 15.856, "num_token_query": 42.32, "num_token_union": 68.6416, "num_word_context": 202.713, "num_word_doc": 50.0868, "num_word_query": 31.9421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2976.4605, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3228, "query_norm": 1.4757, "queue_k_norm": 1.4243, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.32, "sent_len_1": 67.1421, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9525, "stdk": 0.0473, "stdq": 0.0455, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.556, "doc_norm": 1.4216, "encoder_q-embeddings": 1767.5105, "encoder_q-layer.0": 1202.7812, "encoder_q-layer.1": 1311.7062, "encoder_q-layer.10": 727.7123, "encoder_q-layer.11": 1841.2921, "encoder_q-layer.2": 1461.3372, "encoder_q-layer.3": 1507.8461, "encoder_q-layer.4": 1479.6207, "encoder_q-layer.5": 1432.2545, "encoder_q-layer.6": 1471.3022, "encoder_q-layer.7": 1260.838, "encoder_q-layer.8": 1121.9633, "encoder_q-layer.9": 720.6494, "epoch": 0.22, "inbatch_neg_score": 0.3261, "inbatch_pos_score": 0.9448, "learning_rate": 4.3e-05, "loss": 3.556, "norm_diff": 0.0399, "norm_loss": 0.0, "num_token_doc": 66.8266, "num_token_overlap": 15.7866, "num_token_query": 42.1961, "num_token_union": 68.4208, "num_word_context": 202.2463, "num_word_doc": 49.8492, "num_word_query": 31.8558, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.7524, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3252, "query_norm": 1.4615, "queue_k_norm": 1.4259, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1961, "sent_len_1": 66.8266, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.6075, "stdk": 0.047, "stdq": 0.0451, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5651, "doc_norm": 1.425, "encoder_q-embeddings": 26221.9023, "encoder_q-layer.0": 20117.7812, "encoder_q-layer.1": 23684.8398, "encoder_q-layer.10": 1467.2623, "encoder_q-layer.11": 3627.2122, "encoder_q-layer.2": 29082.7969, "encoder_q-layer.3": 28331.4629, "encoder_q-layer.4": 31616.0391, "encoder_q-layer.5": 36422.5898, "encoder_q-layer.6": 28342.8652, "encoder_q-layer.7": 19897.9258, "encoder_q-layer.8": 7574.7681, "encoder_q-layer.9": 1531.2878, "epoch": 0.22, "inbatch_neg_score": 0.3125, "inbatch_pos_score": 0.9204, "learning_rate": 4.294444444444445e-05, "loss": 3.5651, "norm_diff": 0.0104, "norm_loss": 0.0, "num_token_doc": 66.663, "num_token_overlap": 15.7794, "num_token_query": 42.2228, "num_token_union": 68.4087, "num_word_context": 202.3776, "num_word_doc": 49.7927, "num_word_query": 31.9076, "postclip_grad_norm": 1.0, "preclip_grad_norm": 33889.025, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.312, "query_norm": 1.4199, "queue_k_norm": 1.4263, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2228, "sent_len_1": 66.663, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.95, "stdk": 0.0472, "stdq": 0.0434, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.5499, "doc_norm": 1.4176, "encoder_q-embeddings": 3757.6104, "encoder_q-layer.0": 2681.1802, "encoder_q-layer.1": 2729.0251, "encoder_q-layer.10": 732.5893, "encoder_q-layer.11": 1863.1266, "encoder_q-layer.2": 3240.7957, "encoder_q-layer.3": 3263.9016, "encoder_q-layer.4": 2937.4307, "encoder_q-layer.5": 2825.0347, "encoder_q-layer.6": 2733.5344, "encoder_q-layer.7": 2469.9988, "encoder_q-layer.8": 2661.6138, "encoder_q-layer.9": 1385.3939, "epoch": 0.22, "inbatch_neg_score": 0.3063, "inbatch_pos_score": 0.9058, "learning_rate": 4.2888888888888886e-05, "loss": 3.5499, "norm_diff": 0.0209, "norm_loss": 0.0, "num_token_doc": 66.9212, "num_token_overlap": 15.8126, "num_token_query": 42.2476, "num_token_union": 68.5746, "num_word_context": 202.4089, "num_word_doc": 49.9203, "num_word_query": 31.9049, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4143.8294, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.4385, "queue_k_norm": 1.4246, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2476, "sent_len_1": 66.9212, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2113, "stdk": 0.0469, "stdq": 0.0445, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.5626, "doc_norm": 1.4304, "encoder_q-embeddings": 2164.0764, "encoder_q-layer.0": 1622.6777, "encoder_q-layer.1": 1560.0603, "encoder_q-layer.10": 692.7677, "encoder_q-layer.11": 1703.7083, "encoder_q-layer.2": 1828.3186, "encoder_q-layer.3": 1592.6388, "encoder_q-layer.4": 1439.5758, "encoder_q-layer.5": 1319.3086, "encoder_q-layer.6": 1321.8556, "encoder_q-layer.7": 1215.0522, "encoder_q-layer.8": 1106.9502, "encoder_q-layer.9": 716.0681, "epoch": 0.22, "inbatch_neg_score": 0.3087, "inbatch_pos_score": 0.9443, "learning_rate": 4.2833333333333335e-05, "loss": 3.5626, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.9002, "num_token_overlap": 15.8277, "num_token_query": 42.1478, "num_token_union": 68.4384, "num_word_context": 201.9943, "num_word_doc": 49.8878, "num_word_query": 31.8261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2272.3796, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3083, "query_norm": 1.446, "queue_k_norm": 1.4226, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1478, "sent_len_1": 66.9002, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2163, "stdk": 0.0474, "stdq": 0.0448, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.5444, "doc_norm": 1.4277, "encoder_q-embeddings": 3336.5083, "encoder_q-layer.0": 2355.3149, "encoder_q-layer.1": 2655.1135, "encoder_q-layer.10": 721.9989, "encoder_q-layer.11": 1686.7478, "encoder_q-layer.2": 3146.5901, "encoder_q-layer.3": 3666.2161, "encoder_q-layer.4": 3011.4998, "encoder_q-layer.5": 2716.2637, "encoder_q-layer.6": 2559.9656, "encoder_q-layer.7": 1848.032, "encoder_q-layer.8": 1673.4437, "encoder_q-layer.9": 964.0074, "epoch": 0.22, "inbatch_neg_score": 0.3263, "inbatch_pos_score": 0.9893, "learning_rate": 4.277777777777778e-05, "loss": 3.5444, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.6533, "num_token_overlap": 15.8679, "num_token_query": 42.3859, "num_token_union": 68.3801, "num_word_context": 201.7063, "num_word_doc": 49.7708, "num_word_query": 32.0351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3809.8991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.4792, "queue_k_norm": 1.4219, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3859, "sent_len_1": 66.6533, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7575, "stdk": 0.0473, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.5536, "doc_norm": 1.4287, "encoder_q-embeddings": 1735.5134, "encoder_q-layer.0": 1136.3226, "encoder_q-layer.1": 1413.7499, "encoder_q-layer.10": 722.4073, "encoder_q-layer.11": 1811.2253, "encoder_q-layer.2": 1599.47, "encoder_q-layer.3": 1719.9672, "encoder_q-layer.4": 1829.3661, "encoder_q-layer.5": 1790.516, "encoder_q-layer.6": 1557.3994, "encoder_q-layer.7": 1392.4561, "encoder_q-layer.8": 1229.224, "encoder_q-layer.9": 726.2151, "epoch": 0.23, "inbatch_neg_score": 0.3093, "inbatch_pos_score": 0.9268, "learning_rate": 4.272222222222223e-05, "loss": 3.5536, "norm_diff": 0.018, "norm_loss": 0.0, "num_token_doc": 66.9947, "num_token_overlap": 15.8356, "num_token_query": 42.2813, "num_token_union": 68.6239, "num_word_context": 202.6493, "num_word_doc": 50.0117, "num_word_query": 31.9611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2240.6974, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3088, "query_norm": 1.4466, "queue_k_norm": 1.4248, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2813, "sent_len_1": 66.9947, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1337, "stdk": 0.0473, "stdq": 0.0448, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5589, "doc_norm": 1.4203, "encoder_q-embeddings": 1290.5276, "encoder_q-layer.0": 867.0476, "encoder_q-layer.1": 946.9611, "encoder_q-layer.10": 894.7755, "encoder_q-layer.11": 1923.2222, "encoder_q-layer.2": 1117.2251, "encoder_q-layer.3": 1067.208, "encoder_q-layer.4": 1123.7401, "encoder_q-layer.5": 1054.5295, "encoder_q-layer.6": 1104.7299, "encoder_q-layer.7": 1096.3162, "encoder_q-layer.8": 1108.1439, "encoder_q-layer.9": 797.8624, "epoch": 0.23, "inbatch_neg_score": 0.315, "inbatch_pos_score": 0.9336, "learning_rate": 4.266666666666667e-05, "loss": 3.5589, "norm_diff": 0.0582, "norm_loss": 0.0, "num_token_doc": 66.9927, "num_token_overlap": 15.8137, "num_token_query": 42.3295, "num_token_union": 68.6656, "num_word_context": 202.4399, "num_word_doc": 49.994, "num_word_query": 31.9939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1730.0664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3125, "query_norm": 1.4785, "queue_k_norm": 1.4225, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3295, "sent_len_1": 66.9927, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.71, "stdk": 0.047, "stdq": 0.0453, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.5406, "doc_norm": 1.4215, "encoder_q-embeddings": 3915.4612, "encoder_q-layer.0": 2745.2646, "encoder_q-layer.1": 2835.1699, "encoder_q-layer.10": 744.4531, "encoder_q-layer.11": 1768.1176, "encoder_q-layer.2": 3340.7317, "encoder_q-layer.3": 3667.2319, "encoder_q-layer.4": 4283.2549, "encoder_q-layer.5": 3744.199, "encoder_q-layer.6": 3802.2212, "encoder_q-layer.7": 4319.8862, "encoder_q-layer.8": 2782.9265, "encoder_q-layer.9": 921.7292, "epoch": 0.23, "inbatch_neg_score": 0.3016, "inbatch_pos_score": 0.9277, "learning_rate": 4.261111111111111e-05, "loss": 3.5406, "norm_diff": 0.0309, "norm_loss": 0.0, "num_token_doc": 66.7973, "num_token_overlap": 15.8664, "num_token_query": 42.4173, "num_token_union": 68.4968, "num_word_context": 202.3576, "num_word_doc": 49.8685, "num_word_query": 32.048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4832.9686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3, "query_norm": 1.4523, "queue_k_norm": 1.4215, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4173, "sent_len_1": 66.7973, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9275, "stdk": 0.0471, "stdq": 0.0448, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.5433, "doc_norm": 1.4222, "encoder_q-embeddings": 4509.2661, "encoder_q-layer.0": 3150.7239, "encoder_q-layer.1": 3171.4678, "encoder_q-layer.10": 743.5705, "encoder_q-layer.11": 1914.1462, "encoder_q-layer.2": 3268.9348, "encoder_q-layer.3": 3164.6028, "encoder_q-layer.4": 3120.0654, "encoder_q-layer.5": 3000.8447, "encoder_q-layer.6": 2785.8777, "encoder_q-layer.7": 1962.5299, "encoder_q-layer.8": 1305.3245, "encoder_q-layer.9": 808.86, "epoch": 0.23, "inbatch_neg_score": 0.302, "inbatch_pos_score": 0.9155, "learning_rate": 4.255555555555556e-05, "loss": 3.5433, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.9686, "num_token_overlap": 15.843, "num_token_query": 42.4618, "num_token_union": 68.683, "num_word_context": 202.5961, "num_word_doc": 50.0112, "num_word_query": 32.0668, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4288.689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3003, "query_norm": 1.4667, "queue_k_norm": 1.4216, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4618, "sent_len_1": 66.9686, "sent_len_max_0": 127.985, "sent_len_max_1": 189.3075, "stdk": 0.0471, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.5336, "doc_norm": 1.4178, "encoder_q-embeddings": 2311.7234, "encoder_q-layer.0": 1689.9817, "encoder_q-layer.1": 1706.125, "encoder_q-layer.10": 850.4222, "encoder_q-layer.11": 1812.0182, "encoder_q-layer.2": 1720.9999, "encoder_q-layer.3": 1884.9563, "encoder_q-layer.4": 1954.6091, "encoder_q-layer.5": 1925.6177, "encoder_q-layer.6": 1832.4199, "encoder_q-layer.7": 1684.0038, "encoder_q-layer.8": 1515.5756, "encoder_q-layer.9": 885.7546, "epoch": 0.23, "inbatch_neg_score": 0.3082, "inbatch_pos_score": 0.8999, "learning_rate": 4.25e-05, "loss": 3.5336, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.7772, "num_token_overlap": 15.8428, "num_token_query": 42.3273, "num_token_union": 68.4541, "num_word_context": 202.4675, "num_word_doc": 49.8271, "num_word_query": 31.9707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2639.4275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3076, "query_norm": 1.4291, "queue_k_norm": 1.4203, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3273, "sent_len_1": 66.7772, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0813, "stdk": 0.047, "stdq": 0.0441, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.5144, "doc_norm": 1.4239, "encoder_q-embeddings": 5497.7393, "encoder_q-layer.0": 3645.4792, "encoder_q-layer.1": 3667.3276, "encoder_q-layer.10": 839.4769, "encoder_q-layer.11": 1787.7959, "encoder_q-layer.2": 4140.4526, "encoder_q-layer.3": 4008.321, "encoder_q-layer.4": 3325.2363, "encoder_q-layer.5": 2480.1982, "encoder_q-layer.6": 2117.0256, "encoder_q-layer.7": 1619.5925, "encoder_q-layer.8": 1167.0325, "encoder_q-layer.9": 793.5436, "epoch": 0.23, "inbatch_neg_score": 0.2979, "inbatch_pos_score": 0.9268, "learning_rate": 4.2444444444444445e-05, "loss": 3.5144, "norm_diff": 0.0086, "norm_loss": 0.0, "num_token_doc": 66.7464, "num_token_overlap": 15.8329, "num_token_query": 42.2407, "num_token_union": 68.4154, "num_word_context": 201.9576, "num_word_doc": 49.7817, "num_word_query": 31.8944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4832.3713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.42, "queue_k_norm": 1.4206, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2407, "sent_len_1": 66.7464, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9425, "stdk": 0.0472, "stdq": 0.0439, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.5401, "doc_norm": 1.4186, "encoder_q-embeddings": 2774.8909, "encoder_q-layer.0": 2007.5676, "encoder_q-layer.1": 2459.3762, "encoder_q-layer.10": 409.9012, "encoder_q-layer.11": 938.4325, "encoder_q-layer.2": 3022.845, "encoder_q-layer.3": 2792.7681, "encoder_q-layer.4": 2864.9495, "encoder_q-layer.5": 2726.0728, "encoder_q-layer.6": 2252.3018, "encoder_q-layer.7": 1718.8765, "encoder_q-layer.8": 1116.1154, "encoder_q-layer.9": 505.8638, "epoch": 0.23, "inbatch_neg_score": 0.2939, "inbatch_pos_score": 0.8892, "learning_rate": 4.238888888888889e-05, "loss": 3.5401, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.8385, "num_token_overlap": 15.8388, "num_token_query": 42.4679, "num_token_union": 68.5742, "num_word_context": 202.534, "num_word_doc": 49.8779, "num_word_query": 32.0937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3279.4954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2939, "query_norm": 1.3918, "queue_k_norm": 1.421, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4679, "sent_len_1": 66.8385, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.59, "stdk": 0.047, "stdq": 0.0433, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.5294, "doc_norm": 1.4245, "encoder_q-embeddings": 424.0255, "encoder_q-layer.0": 279.1603, "encoder_q-layer.1": 291.6677, "encoder_q-layer.10": 360.2346, "encoder_q-layer.11": 930.0383, "encoder_q-layer.2": 338.0148, "encoder_q-layer.3": 357.5654, "encoder_q-layer.4": 351.5863, "encoder_q-layer.5": 318.7492, "encoder_q-layer.6": 343.4741, "encoder_q-layer.7": 339.0234, "encoder_q-layer.8": 403.0653, "encoder_q-layer.9": 337.4149, "epoch": 0.23, "inbatch_neg_score": 0.3001, "inbatch_pos_score": 0.9072, "learning_rate": 4.233333333333334e-05, "loss": 3.5294, "norm_diff": 0.0184, "norm_loss": 0.0, "num_token_doc": 66.8055, "num_token_overlap": 15.864, "num_token_query": 42.3321, "num_token_union": 68.4688, "num_word_context": 202.5354, "num_word_doc": 49.8508, "num_word_query": 31.994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 667.2254, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3005, "query_norm": 1.4206, "queue_k_norm": 1.4193, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3321, "sent_len_1": 66.8055, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1238, "stdk": 0.0473, "stdq": 0.044, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.5375, "doc_norm": 1.4198, "encoder_q-embeddings": 810.7456, "encoder_q-layer.0": 592.7225, "encoder_q-layer.1": 637.5038, "encoder_q-layer.10": 385.367, "encoder_q-layer.11": 927.8147, "encoder_q-layer.2": 707.4471, "encoder_q-layer.3": 659.144, "encoder_q-layer.4": 637.5671, "encoder_q-layer.5": 587.8919, "encoder_q-layer.6": 580.2046, "encoder_q-layer.7": 563.302, "encoder_q-layer.8": 553.5256, "encoder_q-layer.9": 369.6459, "epoch": 0.23, "inbatch_neg_score": 0.2822, "inbatch_pos_score": 0.8716, "learning_rate": 4.227777777777778e-05, "loss": 3.5375, "norm_diff": 0.0107, "norm_loss": 0.0, "num_token_doc": 66.773, "num_token_overlap": 15.7974, "num_token_query": 42.1769, "num_token_union": 68.3828, "num_word_context": 201.8703, "num_word_doc": 49.7673, "num_word_query": 31.8534, "postclip_grad_norm": 1.0, "preclip_grad_norm": 961.6054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.281, "query_norm": 1.4191, "queue_k_norm": 1.4185, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1769, "sent_len_1": 66.773, "sent_len_max_0": 128.0, "sent_len_max_1": 190.645, "stdk": 0.0471, "stdq": 0.0443, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.5211, "doc_norm": 1.4206, "encoder_q-embeddings": 564.1559, "encoder_q-layer.0": 380.3267, "encoder_q-layer.1": 453.6556, "encoder_q-layer.10": 309.3547, "encoder_q-layer.11": 771.4752, "encoder_q-layer.2": 496.0181, "encoder_q-layer.3": 558.2604, "encoder_q-layer.4": 559.5201, "encoder_q-layer.5": 570.0541, "encoder_q-layer.6": 521.0296, "encoder_q-layer.7": 470.1226, "encoder_q-layer.8": 399.429, "encoder_q-layer.9": 301.3488, "epoch": 0.23, "inbatch_neg_score": 0.2844, "inbatch_pos_score": 0.8984, "learning_rate": 4.222222222222222e-05, "loss": 3.5211, "norm_diff": 0.0123, "norm_loss": 0.0, "num_token_doc": 66.7234, "num_token_overlap": 15.8484, "num_token_query": 42.4153, "num_token_union": 68.4811, "num_word_context": 202.2571, "num_word_doc": 49.7983, "num_word_query": 32.0609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 755.9105, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2817, "query_norm": 1.4275, "queue_k_norm": 1.4179, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4153, "sent_len_1": 66.7234, "sent_len_max_0": 127.99, "sent_len_max_1": 191.1712, "stdk": 0.0471, "stdq": 0.0446, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.5408, "doc_norm": 1.4105, "encoder_q-embeddings": 618.6715, "encoder_q-layer.0": 417.8802, "encoder_q-layer.1": 482.8528, "encoder_q-layer.10": 379.8028, "encoder_q-layer.11": 862.6256, "encoder_q-layer.2": 526.1521, "encoder_q-layer.3": 569.1696, "encoder_q-layer.4": 538.1213, "encoder_q-layer.5": 518.8437, "encoder_q-layer.6": 492.6127, "encoder_q-layer.7": 439.9807, "encoder_q-layer.8": 433.7272, "encoder_q-layer.9": 346.4371, "epoch": 0.24, "inbatch_neg_score": 0.2891, "inbatch_pos_score": 0.9033, "learning_rate": 4.216666666666667e-05, "loss": 3.5408, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.6974, "num_token_overlap": 15.7899, "num_token_query": 42.2706, "num_token_union": 68.4288, "num_word_context": 202.4429, "num_word_doc": 49.7604, "num_word_query": 31.9057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 784.6352, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2886, "query_norm": 1.4501, "queue_k_norm": 1.4185, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2706, "sent_len_1": 66.6974, "sent_len_max_0": 128.0, "sent_len_max_1": 188.86, "stdk": 0.0468, "stdq": 0.0453, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.523, "doc_norm": 1.4095, "encoder_q-embeddings": 944.0238, "encoder_q-layer.0": 706.2749, "encoder_q-layer.1": 826.399, "encoder_q-layer.10": 375.9094, "encoder_q-layer.11": 847.3136, "encoder_q-layer.2": 858.4628, "encoder_q-layer.3": 949.9136, "encoder_q-layer.4": 924.8572, "encoder_q-layer.5": 881.7258, "encoder_q-layer.6": 957.1541, "encoder_q-layer.7": 960.4061, "encoder_q-layer.8": 741.6403, "encoder_q-layer.9": 434.6812, "epoch": 0.24, "inbatch_neg_score": 0.2855, "inbatch_pos_score": 0.9106, "learning_rate": 4.211111111111111e-05, "loss": 3.523, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.7788, "num_token_overlap": 15.9094, "num_token_query": 42.584, "num_token_union": 68.5827, "num_word_context": 202.8306, "num_word_doc": 49.8766, "num_word_query": 32.1759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1229.9309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.4527, "queue_k_norm": 1.4175, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.584, "sent_len_1": 66.7788, "sent_len_max_0": 127.99, "sent_len_max_1": 188.5375, "stdk": 0.0468, "stdq": 0.0457, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.5424, "doc_norm": 1.4146, "encoder_q-embeddings": 1581.7815, "encoder_q-layer.0": 1229.0271, "encoder_q-layer.1": 1525.7362, "encoder_q-layer.10": 383.7467, "encoder_q-layer.11": 923.2982, "encoder_q-layer.2": 2258.9514, "encoder_q-layer.3": 2142.5496, "encoder_q-layer.4": 1770.693, "encoder_q-layer.5": 1542.6863, "encoder_q-layer.6": 1402.7897, "encoder_q-layer.7": 1858.0345, "encoder_q-layer.8": 1402.4646, "encoder_q-layer.9": 790.3934, "epoch": 0.24, "inbatch_neg_score": 0.2956, "inbatch_pos_score": 0.8989, "learning_rate": 4.205555555555556e-05, "loss": 3.5424, "norm_diff": 0.0167, "norm_loss": 0.0, "num_token_doc": 66.8084, "num_token_overlap": 15.7839, "num_token_query": 42.2908, "num_token_union": 68.5234, "num_word_context": 202.4146, "num_word_doc": 49.8568, "num_word_query": 31.9565, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2300.5427, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.4313, "queue_k_norm": 1.4184, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2908, "sent_len_1": 66.8084, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4238, "stdk": 0.047, "stdq": 0.0445, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.5296, "doc_norm": 1.4154, "encoder_q-embeddings": 775.2679, "encoder_q-layer.0": 565.0633, "encoder_q-layer.1": 651.6453, "encoder_q-layer.10": 338.7307, "encoder_q-layer.11": 846.2935, "encoder_q-layer.2": 762.5198, "encoder_q-layer.3": 713.0014, "encoder_q-layer.4": 699.2327, "encoder_q-layer.5": 595.1265, "encoder_q-layer.6": 506.8191, "encoder_q-layer.7": 455.037, "encoder_q-layer.8": 434.3651, "encoder_q-layer.9": 350.7844, "epoch": 0.24, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 0.9019, "learning_rate": 4.2e-05, "loss": 3.5296, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.7382, "num_token_overlap": 15.7589, "num_token_query": 42.1284, "num_token_union": 68.4168, "num_word_context": 202.0011, "num_word_doc": 49.783, "num_word_query": 31.7943, "postclip_grad_norm": 1.0, "preclip_grad_norm": 936.3594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2905, "query_norm": 1.4172, "queue_k_norm": 1.4174, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1284, "sent_len_1": 66.7382, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1662, "stdk": 0.047, "stdq": 0.044, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.5372, "doc_norm": 1.4076, "encoder_q-embeddings": 1010.7251, "encoder_q-layer.0": 745.0793, "encoder_q-layer.1": 800.2898, "encoder_q-layer.10": 394.5252, "encoder_q-layer.11": 921.1237, "encoder_q-layer.2": 958.1786, "encoder_q-layer.3": 1035.389, "encoder_q-layer.4": 932.0995, "encoder_q-layer.5": 818.0266, "encoder_q-layer.6": 708.451, "encoder_q-layer.7": 583.4425, "encoder_q-layer.8": 515.5844, "encoder_q-layer.9": 390.7227, "epoch": 0.24, "inbatch_neg_score": 0.2826, "inbatch_pos_score": 0.8916, "learning_rate": 4.194444444444445e-05, "loss": 3.5372, "norm_diff": 0.046, "norm_loss": 0.0, "num_token_doc": 66.8949, "num_token_overlap": 15.8841, "num_token_query": 42.2945, "num_token_union": 68.5076, "num_word_context": 202.054, "num_word_doc": 49.9171, "num_word_query": 31.9524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1191.369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2805, "query_norm": 1.4536, "queue_k_norm": 1.4157, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2945, "sent_len_1": 66.8949, "sent_len_max_0": 128.0, "sent_len_max_1": 188.79, "stdk": 0.0467, "stdq": 0.0459, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.5234, "doc_norm": 1.416, "encoder_q-embeddings": 2163.7646, "encoder_q-layer.0": 1580.566, "encoder_q-layer.1": 1463.415, "encoder_q-layer.10": 391.0819, "encoder_q-layer.11": 882.7302, "encoder_q-layer.2": 1561.1184, "encoder_q-layer.3": 1506.6431, "encoder_q-layer.4": 1484.0779, "encoder_q-layer.5": 1198.9419, "encoder_q-layer.6": 1227.1688, "encoder_q-layer.7": 1135.7449, "encoder_q-layer.8": 987.405, "encoder_q-layer.9": 605.2446, "epoch": 0.24, "inbatch_neg_score": 0.2837, "inbatch_pos_score": 0.8799, "learning_rate": 4.188888888888889e-05, "loss": 3.5234, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.8194, "num_token_overlap": 15.8489, "num_token_query": 42.3566, "num_token_union": 68.4629, "num_word_context": 202.2878, "num_word_doc": 49.8033, "num_word_query": 31.9811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.7929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.282, "query_norm": 1.436, "queue_k_norm": 1.4154, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3566, "sent_len_1": 66.8194, "sent_len_max_0": 128.0, "sent_len_max_1": 190.705, "stdk": 0.0471, "stdq": 0.045, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.5176, "doc_norm": 1.4124, "encoder_q-embeddings": 723.0079, "encoder_q-layer.0": 508.6112, "encoder_q-layer.1": 598.2256, "encoder_q-layer.10": 353.778, "encoder_q-layer.11": 883.3088, "encoder_q-layer.2": 639.3021, "encoder_q-layer.3": 678.5705, "encoder_q-layer.4": 628.2185, "encoder_q-layer.5": 509.4614, "encoder_q-layer.6": 479.2069, "encoder_q-layer.7": 457.1575, "encoder_q-layer.8": 433.6591, "encoder_q-layer.9": 350.1732, "epoch": 0.24, "inbatch_neg_score": 0.2965, "inbatch_pos_score": 0.9365, "learning_rate": 4.183333333333334e-05, "loss": 3.5176, "norm_diff": 0.0377, "norm_loss": 0.0, "num_token_doc": 66.7335, "num_token_overlap": 15.8551, "num_token_query": 42.4107, "num_token_union": 68.5059, "num_word_context": 202.3488, "num_word_doc": 49.8154, "num_word_query": 32.0633, "postclip_grad_norm": 1.0, "preclip_grad_norm": 883.7418, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.4501, "queue_k_norm": 1.4139, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4107, "sent_len_1": 66.7335, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5613, "stdk": 0.047, "stdq": 0.0451, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 49.9023, "active_queue_size": 16384.0, "cl_loss": 3.5267, "doc_norm": 1.4184, "encoder_q-embeddings": 503.5207, "encoder_q-layer.0": 354.4749, "encoder_q-layer.1": 389.0938, "encoder_q-layer.10": 361.8225, "encoder_q-layer.11": 855.4907, "encoder_q-layer.2": 424.6314, "encoder_q-layer.3": 439.8894, "encoder_q-layer.4": 446.7343, "encoder_q-layer.5": 426.098, "encoder_q-layer.6": 446.2008, "encoder_q-layer.7": 432.3596, "encoder_q-layer.8": 404.9433, "encoder_q-layer.9": 339.0898, "epoch": 0.24, "inbatch_neg_score": 0.2891, "inbatch_pos_score": 0.8994, "learning_rate": 4.177777777777778e-05, "loss": 3.5267, "norm_diff": 0.0163, "norm_loss": 0.0, "num_token_doc": 66.8609, "num_token_overlap": 15.8294, "num_token_query": 42.2238, "num_token_union": 68.4596, "num_word_context": 202.1727, "num_word_doc": 49.8457, "num_word_query": 31.8855, "postclip_grad_norm": 1.0, "preclip_grad_norm": 701.4763, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2881, "query_norm": 1.4267, "queue_k_norm": 1.4143, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2238, "sent_len_1": 66.8609, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.7163, "stdk": 0.0472, "stdq": 0.0446, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.5369, "doc_norm": 1.4072, "encoder_q-embeddings": 935.5237, "encoder_q-layer.0": 641.0816, "encoder_q-layer.1": 660.0831, "encoder_q-layer.10": 353.4335, "encoder_q-layer.11": 927.015, "encoder_q-layer.2": 789.6332, "encoder_q-layer.3": 879.6733, "encoder_q-layer.4": 863.0703, "encoder_q-layer.5": 745.9481, "encoder_q-layer.6": 734.6629, "encoder_q-layer.7": 586.5313, "encoder_q-layer.8": 465.3384, "encoder_q-layer.9": 345.5711, "epoch": 0.24, "inbatch_neg_score": 0.281, "inbatch_pos_score": 0.8848, "learning_rate": 4.172222222222222e-05, "loss": 3.5369, "norm_diff": 0.0106, "norm_loss": 0.0, "num_token_doc": 66.8532, "num_token_overlap": 15.7434, "num_token_query": 42.1838, "num_token_union": 68.5616, "num_word_context": 202.6035, "num_word_doc": 49.8643, "num_word_query": 31.8741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1104.2229, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2803, "query_norm": 1.4141, "queue_k_norm": 1.4112, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1838, "sent_len_1": 66.8532, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9487, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5123, "doc_norm": 1.407, "encoder_q-embeddings": 873.2898, "encoder_q-layer.0": 614.594, "encoder_q-layer.1": 682.0983, "encoder_q-layer.10": 369.4571, "encoder_q-layer.11": 874.5702, "encoder_q-layer.2": 775.8237, "encoder_q-layer.3": 738.2112, "encoder_q-layer.4": 686.3924, "encoder_q-layer.5": 631.1836, "encoder_q-layer.6": 607.3119, "encoder_q-layer.7": 556.2333, "encoder_q-layer.8": 483.6981, "encoder_q-layer.9": 365.4651, "epoch": 0.24, "inbatch_neg_score": 0.2852, "inbatch_pos_score": 0.9082, "learning_rate": 4.166666666666667e-05, "loss": 3.5123, "norm_diff": 0.0356, "norm_loss": 0.0, "num_token_doc": 66.8282, "num_token_overlap": 15.8979, "num_token_query": 42.3906, "num_token_union": 68.4578, "num_word_context": 202.3447, "num_word_doc": 49.8588, "num_word_query": 32.0061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1003.5601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2842, "query_norm": 1.4427, "queue_k_norm": 1.4113, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3906, "sent_len_1": 66.8282, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.4563, "stdk": 0.0468, "stdq": 0.0455, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.5046, "doc_norm": 1.4141, "encoder_q-embeddings": 595.4454, "encoder_q-layer.0": 399.9216, "encoder_q-layer.1": 446.8768, "encoder_q-layer.10": 346.3107, "encoder_q-layer.11": 893.6196, "encoder_q-layer.2": 544.1285, "encoder_q-layer.3": 595.1115, "encoder_q-layer.4": 591.5286, "encoder_q-layer.5": 516.6929, "encoder_q-layer.6": 546.3895, "encoder_q-layer.7": 533.6388, "encoder_q-layer.8": 482.6178, "encoder_q-layer.9": 364.8353, "epoch": 0.25, "inbatch_neg_score": 0.2637, "inbatch_pos_score": 0.8833, "learning_rate": 4.1611111111111114e-05, "loss": 3.5046, "norm_diff": 0.0338, "norm_loss": 0.0, "num_token_doc": 66.8889, "num_token_overlap": 15.9004, "num_token_query": 42.4049, "num_token_union": 68.5532, "num_word_context": 202.4722, "num_word_doc": 49.9084, "num_word_query": 31.9697, "postclip_grad_norm": 1.0, "preclip_grad_norm": 815.1468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2629, "query_norm": 1.3803, "queue_k_norm": 1.4117, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4049, "sent_len_1": 66.8889, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0475, "stdk": 0.0471, "stdq": 0.0439, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5153, "doc_norm": 1.4085, "encoder_q-embeddings": 2007.0917, "encoder_q-layer.0": 1372.2855, "encoder_q-layer.1": 1457.1324, "encoder_q-layer.10": 359.8555, "encoder_q-layer.11": 880.3874, "encoder_q-layer.2": 1860.8085, "encoder_q-layer.3": 2015.6039, "encoder_q-layer.4": 2411.3538, "encoder_q-layer.5": 2077.1213, "encoder_q-layer.6": 1953.4177, "encoder_q-layer.7": 1565.21, "encoder_q-layer.8": 1171.2087, "encoder_q-layer.9": 461.9216, "epoch": 0.25, "inbatch_neg_score": 0.2805, "inbatch_pos_score": 0.9019, "learning_rate": 4.155555555555556e-05, "loss": 3.5153, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.9137, "num_token_overlap": 15.8344, "num_token_query": 42.3734, "num_token_union": 68.6048, "num_word_context": 202.4123, "num_word_doc": 49.921, "num_word_query": 32.0043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2422.6056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.4219, "queue_k_norm": 1.4094, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3734, "sent_len_1": 66.9137, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.2275, "stdk": 0.047, "stdq": 0.0445, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 49.1211, "active_queue_size": 16384.0, "cl_loss": 3.5143, "doc_norm": 1.4054, "encoder_q-embeddings": 922.2901, "encoder_q-layer.0": 701.449, "encoder_q-layer.1": 730.0368, "encoder_q-layer.10": 381.3601, "encoder_q-layer.11": 972.8754, "encoder_q-layer.2": 867.2454, "encoder_q-layer.3": 942.1152, "encoder_q-layer.4": 924.585, "encoder_q-layer.5": 847.9012, "encoder_q-layer.6": 778.4319, "encoder_q-layer.7": 685.6788, "encoder_q-layer.8": 538.2916, "encoder_q-layer.9": 397.7112, "epoch": 0.25, "inbatch_neg_score": 0.2796, "inbatch_pos_score": 0.8735, "learning_rate": 4.15e-05, "loss": 3.5143, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.8816, "num_token_overlap": 15.7935, "num_token_query": 42.3148, "num_token_union": 68.5759, "num_word_context": 202.5123, "num_word_doc": 49.9172, "num_word_query": 31.9455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1150.0421, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2781, "query_norm": 1.3934, "queue_k_norm": 1.4108, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3148, "sent_len_1": 66.8816, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5037, "stdk": 0.0468, "stdq": 0.0436, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.5221, "doc_norm": 1.4036, "encoder_q-embeddings": 721.9377, "encoder_q-layer.0": 530.2433, "encoder_q-layer.1": 571.088, "encoder_q-layer.10": 332.6889, "encoder_q-layer.11": 803.9194, "encoder_q-layer.2": 604.2869, "encoder_q-layer.3": 614.5715, "encoder_q-layer.4": 617.0102, "encoder_q-layer.5": 544.4011, "encoder_q-layer.6": 560.3868, "encoder_q-layer.7": 523.2128, "encoder_q-layer.8": 462.6124, "encoder_q-layer.9": 329.5496, "epoch": 0.25, "inbatch_neg_score": 0.2776, "inbatch_pos_score": 0.8823, "learning_rate": 4.144444444444445e-05, "loss": 3.5221, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.9336, "num_token_overlap": 15.8357, "num_token_query": 42.3683, "num_token_union": 68.5748, "num_word_context": 202.5418, "num_word_doc": 49.9262, "num_word_query": 31.9785, "postclip_grad_norm": 1.0, "preclip_grad_norm": 865.3058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.4141, "queue_k_norm": 1.4114, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3683, "sent_len_1": 66.9336, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.2962, "stdk": 0.0468, "stdq": 0.0443, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.4994, "doc_norm": 1.4142, "encoder_q-embeddings": 889.2588, "encoder_q-layer.0": 582.0151, "encoder_q-layer.1": 674.202, "encoder_q-layer.10": 352.6041, "encoder_q-layer.11": 888.8787, "encoder_q-layer.2": 747.9534, "encoder_q-layer.3": 740.6217, "encoder_q-layer.4": 752.5285, "encoder_q-layer.5": 634.163, "encoder_q-layer.6": 592.2759, "encoder_q-layer.7": 636.665, "encoder_q-layer.8": 625.0667, "encoder_q-layer.9": 399.8715, "epoch": 0.25, "inbatch_neg_score": 0.2866, "inbatch_pos_score": 0.9248, "learning_rate": 4.138888888888889e-05, "loss": 3.4994, "norm_diff": 0.0354, "norm_loss": 0.0, "num_token_doc": 66.8425, "num_token_overlap": 15.8484, "num_token_query": 42.2149, "num_token_union": 68.4386, "num_word_context": 202.0198, "num_word_doc": 49.858, "num_word_query": 31.8883, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1047.758, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4496, "queue_k_norm": 1.409, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2149, "sent_len_1": 66.8425, "sent_len_max_0": 128.0, "sent_len_max_1": 189.05, "stdk": 0.0472, "stdq": 0.0454, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.5164, "doc_norm": 1.4189, "encoder_q-embeddings": 574.5864, "encoder_q-layer.0": 377.7545, "encoder_q-layer.1": 441.893, "encoder_q-layer.10": 373.5566, "encoder_q-layer.11": 903.4904, "encoder_q-layer.2": 513.5632, "encoder_q-layer.3": 553.669, "encoder_q-layer.4": 563.8809, "encoder_q-layer.5": 495.9557, "encoder_q-layer.6": 459.5681, "encoder_q-layer.7": 435.7579, "encoder_q-layer.8": 411.8104, "encoder_q-layer.9": 336.5861, "epoch": 0.25, "inbatch_neg_score": 0.2917, "inbatch_pos_score": 0.9326, "learning_rate": 4.133333333333333e-05, "loss": 3.5164, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.6092, "num_token_overlap": 15.789, "num_token_query": 42.1596, "num_token_union": 68.3347, "num_word_context": 202.0941, "num_word_doc": 49.7369, "num_word_query": 31.8655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 790.814, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.4363, "queue_k_norm": 1.4112, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1596, "sent_len_1": 66.6092, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6438, "stdk": 0.0474, "stdq": 0.0447, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.5087, "doc_norm": 1.4117, "encoder_q-embeddings": 2857.4966, "encoder_q-layer.0": 1896.7734, "encoder_q-layer.1": 1879.1315, "encoder_q-layer.10": 662.517, "encoder_q-layer.11": 1692.5215, "encoder_q-layer.2": 2142.5811, "encoder_q-layer.3": 2145.1777, "encoder_q-layer.4": 2256.3083, "encoder_q-layer.5": 2214.6104, "encoder_q-layer.6": 2164.9841, "encoder_q-layer.7": 1971.7769, "encoder_q-layer.8": 1558.9565, "encoder_q-layer.9": 897.1175, "epoch": 0.25, "inbatch_neg_score": 0.2764, "inbatch_pos_score": 0.918, "learning_rate": 4.127777777777778e-05, "loss": 3.5087, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.6257, "num_token_overlap": 15.8667, "num_token_query": 42.4313, "num_token_union": 68.3923, "num_word_context": 202.1275, "num_word_doc": 49.7143, "num_word_query": 32.0423, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2964.3901, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2769, "query_norm": 1.4408, "queue_k_norm": 1.4101, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4313, "sent_len_1": 66.6257, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.4913, "stdk": 0.0471, "stdq": 0.0455, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.5033, "doc_norm": 1.4153, "encoder_q-embeddings": 2004.3867, "encoder_q-layer.0": 1540.6689, "encoder_q-layer.1": 1650.4535, "encoder_q-layer.10": 721.3183, "encoder_q-layer.11": 1925.2849, "encoder_q-layer.2": 1937.1832, "encoder_q-layer.3": 2106.9465, "encoder_q-layer.4": 2233.5066, "encoder_q-layer.5": 2231.9771, "encoder_q-layer.6": 2156.7869, "encoder_q-layer.7": 1595.1443, "encoder_q-layer.8": 1206.4008, "encoder_q-layer.9": 858.481, "epoch": 0.25, "inbatch_neg_score": 0.2874, "inbatch_pos_score": 0.9033, "learning_rate": 4.1222222222222224e-05, "loss": 3.5033, "norm_diff": 0.0217, "norm_loss": 0.0, "num_token_doc": 66.7815, "num_token_overlap": 15.8394, "num_token_query": 42.3147, "num_token_union": 68.4298, "num_word_context": 202.4894, "num_word_doc": 49.8333, "num_word_query": 31.9416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2625.8833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.4351, "queue_k_norm": 1.4123, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3147, "sent_len_1": 66.7815, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4812, "stdk": 0.0472, "stdq": 0.0452, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.5027, "doc_norm": 1.4078, "encoder_q-embeddings": 2205.8008, "encoder_q-layer.0": 1745.5314, "encoder_q-layer.1": 1892.8157, "encoder_q-layer.10": 676.8259, "encoder_q-layer.11": 1624.5757, "encoder_q-layer.2": 2186.635, "encoder_q-layer.3": 2086.4287, "encoder_q-layer.4": 2129.1877, "encoder_q-layer.5": 2110.4512, "encoder_q-layer.6": 1949.7676, "encoder_q-layer.7": 2056.2593, "encoder_q-layer.8": 1887.7281, "encoder_q-layer.9": 1036.4456, "epoch": 0.25, "inbatch_neg_score": 0.2834, "inbatch_pos_score": 0.9224, "learning_rate": 4.116666666666667e-05, "loss": 3.5027, "norm_diff": 0.0351, "norm_loss": 0.0, "num_token_doc": 66.695, "num_token_overlap": 15.8514, "num_token_query": 42.496, "num_token_union": 68.5206, "num_word_context": 202.2094, "num_word_doc": 49.7635, "num_word_query": 32.1049, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2820.4266, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.4429, "queue_k_norm": 1.4113, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.496, "sent_len_1": 66.695, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.05, "stdk": 0.047, "stdq": 0.0453, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.5039, "doc_norm": 1.4166, "encoder_q-embeddings": 2428.5396, "encoder_q-layer.0": 1638.6434, "encoder_q-layer.1": 1809.1311, "encoder_q-layer.10": 783.0735, "encoder_q-layer.11": 1896.4181, "encoder_q-layer.2": 2089.7148, "encoder_q-layer.3": 2124.499, "encoder_q-layer.4": 1958.4442, "encoder_q-layer.5": 2196.8823, "encoder_q-layer.6": 2047.3568, "encoder_q-layer.7": 1440.6392, "encoder_q-layer.8": 1235.0671, "encoder_q-layer.9": 861.9449, "epoch": 0.25, "inbatch_neg_score": 0.2833, "inbatch_pos_score": 0.9316, "learning_rate": 4.111111111111111e-05, "loss": 3.5039, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 66.7636, "num_token_overlap": 15.8381, "num_token_query": 42.3421, "num_token_union": 68.4549, "num_word_context": 202.5537, "num_word_doc": 49.8124, "num_word_query": 31.9652, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2743.5665, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2844, "query_norm": 1.4602, "queue_k_norm": 1.4122, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3421, "sent_len_1": 66.7636, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1262, "stdk": 0.0474, "stdq": 0.0462, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.5172, "doc_norm": 1.4094, "encoder_q-embeddings": 1275.917, "encoder_q-layer.0": 893.1187, "encoder_q-layer.1": 1020.8595, "encoder_q-layer.10": 661.6027, "encoder_q-layer.11": 1624.7402, "encoder_q-layer.2": 1231.9808, "encoder_q-layer.3": 1337.6227, "encoder_q-layer.4": 1317.994, "encoder_q-layer.5": 1025.0975, "encoder_q-layer.6": 1020.0837, "encoder_q-layer.7": 1020.1613, "encoder_q-layer.8": 854.5742, "encoder_q-layer.9": 645.2787, "epoch": 0.25, "inbatch_neg_score": 0.2779, "inbatch_pos_score": 0.8916, "learning_rate": 4.105555555555556e-05, "loss": 3.5172, "norm_diff": 0.0222, "norm_loss": 0.0, "num_token_doc": 66.6242, "num_token_overlap": 15.8361, "num_token_query": 42.3803, "num_token_union": 68.4322, "num_word_context": 202.1867, "num_word_doc": 49.6855, "num_word_query": 32.0283, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1684.7724, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2771, "query_norm": 1.3973, "queue_k_norm": 1.4114, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3803, "sent_len_1": 66.6242, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.1987, "stdk": 0.047, "stdq": 0.044, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.4863, "doc_norm": 1.4106, "encoder_q-embeddings": 1706.8857, "encoder_q-layer.0": 1262.7179, "encoder_q-layer.1": 1397.2137, "encoder_q-layer.10": 743.3315, "encoder_q-layer.11": 1832.0997, "encoder_q-layer.2": 1412.5978, "encoder_q-layer.3": 1385.6135, "encoder_q-layer.4": 1470.6111, "encoder_q-layer.5": 1224.5764, "encoder_q-layer.6": 1452.6476, "encoder_q-layer.7": 1469.9866, "encoder_q-layer.8": 1491.8152, "encoder_q-layer.9": 943.3134, "epoch": 0.26, "inbatch_neg_score": 0.2865, "inbatch_pos_score": 0.9302, "learning_rate": 4.1e-05, "loss": 3.4863, "norm_diff": 0.0333, "norm_loss": 0.0, "num_token_doc": 66.6906, "num_token_overlap": 15.8506, "num_token_query": 42.3995, "num_token_union": 68.4159, "num_word_context": 202.1352, "num_word_doc": 49.7508, "num_word_query": 31.9995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2146.3797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2856, "query_norm": 1.4439, "queue_k_norm": 1.4123, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3995, "sent_len_1": 66.6906, "sent_len_max_0": 128.0, "sent_len_max_1": 190.76, "stdk": 0.0471, "stdq": 0.0458, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.4833, "doc_norm": 1.4123, "encoder_q-embeddings": 1238.1621, "encoder_q-layer.0": 880.2014, "encoder_q-layer.1": 957.7583, "encoder_q-layer.10": 738.955, "encoder_q-layer.11": 1721.2142, "encoder_q-layer.2": 1095.0662, "encoder_q-layer.3": 1075.4099, "encoder_q-layer.4": 1111.0007, "encoder_q-layer.5": 1072.7828, "encoder_q-layer.6": 1098.3011, "encoder_q-layer.7": 1202.0576, "encoder_q-layer.8": 1211.7642, "encoder_q-layer.9": 843.4171, "epoch": 0.26, "inbatch_neg_score": 0.2887, "inbatch_pos_score": 0.895, "learning_rate": 4.094444444444445e-05, "loss": 3.4833, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 67.1952, "num_token_overlap": 15.9309, "num_token_query": 42.5896, "num_token_union": 68.7838, "num_word_context": 202.7373, "num_word_doc": 50.1459, "num_word_query": 32.1819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1706.9697, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.4072, "queue_k_norm": 1.4124, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5896, "sent_len_1": 67.1952, "sent_len_max_0": 127.995, "sent_len_max_1": 189.6488, "stdk": 0.0471, "stdq": 0.044, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4935, "doc_norm": 1.4084, "encoder_q-embeddings": 1248.5558, "encoder_q-layer.0": 858.8257, "encoder_q-layer.1": 962.7263, "encoder_q-layer.10": 712.3659, "encoder_q-layer.11": 1757.319, "encoder_q-layer.2": 1185.4899, "encoder_q-layer.3": 1280.0055, "encoder_q-layer.4": 1212.9585, "encoder_q-layer.5": 1252.5547, "encoder_q-layer.6": 1218.6022, "encoder_q-layer.7": 952.1763, "encoder_q-layer.8": 861.707, "encoder_q-layer.9": 666.6838, "epoch": 0.26, "inbatch_neg_score": 0.284, "inbatch_pos_score": 0.9087, "learning_rate": 4.088888888888889e-05, "loss": 3.4935, "norm_diff": 0.011, "norm_loss": 0.0, "num_token_doc": 66.7781, "num_token_overlap": 15.7887, "num_token_query": 42.1571, "num_token_union": 68.4091, "num_word_context": 202.3544, "num_word_doc": 49.8714, "num_word_query": 31.844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1701.8497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2832, "query_norm": 1.4073, "queue_k_norm": 1.4126, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1571, "sent_len_1": 66.7781, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4263, "stdk": 0.047, "stdq": 0.0444, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4932, "doc_norm": 1.4131, "encoder_q-embeddings": 4394.6572, "encoder_q-layer.0": 3167.6545, "encoder_q-layer.1": 3654.0669, "encoder_q-layer.10": 788.7315, "encoder_q-layer.11": 1831.3527, "encoder_q-layer.2": 4532.2827, "encoder_q-layer.3": 4405.71, "encoder_q-layer.4": 3945.9929, "encoder_q-layer.5": 3300.7744, "encoder_q-layer.6": 2439.8713, "encoder_q-layer.7": 2003.7867, "encoder_q-layer.8": 1576.0459, "encoder_q-layer.9": 862.517, "epoch": 0.26, "inbatch_neg_score": 0.2762, "inbatch_pos_score": 0.9258, "learning_rate": 4.0833333333333334e-05, "loss": 3.4932, "norm_diff": 0.0241, "norm_loss": 0.0, "num_token_doc": 66.6538, "num_token_overlap": 15.7814, "num_token_query": 42.1874, "num_token_union": 68.413, "num_word_context": 202.0292, "num_word_doc": 49.7692, "num_word_query": 31.8594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4710.6143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2766, "query_norm": 1.4359, "queue_k_norm": 1.4119, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1874, "sent_len_1": 66.6538, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7512, "stdk": 0.0472, "stdq": 0.0456, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 50.5859, "active_queue_size": 16384.0, "cl_loss": 3.5001, "doc_norm": 1.4097, "encoder_q-embeddings": 4746.3403, "encoder_q-layer.0": 3331.8442, "encoder_q-layer.1": 3360.5754, "encoder_q-layer.10": 690.7958, "encoder_q-layer.11": 1699.0909, "encoder_q-layer.2": 3463.1526, "encoder_q-layer.3": 3465.0601, "encoder_q-layer.4": 3972.728, "encoder_q-layer.5": 3381.0842, "encoder_q-layer.6": 3765.0833, "encoder_q-layer.7": 3291.5466, "encoder_q-layer.8": 2335.3203, "encoder_q-layer.9": 1139.8844, "epoch": 0.26, "inbatch_neg_score": 0.2797, "inbatch_pos_score": 0.8955, "learning_rate": 4.0777777777777783e-05, "loss": 3.5001, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.6685, "num_token_overlap": 15.8312, "num_token_query": 42.3031, "num_token_union": 68.447, "num_word_context": 202.0252, "num_word_doc": 49.779, "num_word_query": 31.9729, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4919.9797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2798, "query_norm": 1.3975, "queue_k_norm": 1.413, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3031, "sent_len_1": 66.6685, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7688, "stdk": 0.0471, "stdq": 0.0441, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.4925, "doc_norm": 1.41, "encoder_q-embeddings": 2307.8833, "encoder_q-layer.0": 1606.7426, "encoder_q-layer.1": 1946.0052, "encoder_q-layer.10": 791.5869, "encoder_q-layer.11": 1906.681, "encoder_q-layer.2": 2321.6912, "encoder_q-layer.3": 2605.6973, "encoder_q-layer.4": 2382.6497, "encoder_q-layer.5": 1744.4165, "encoder_q-layer.6": 1515.3907, "encoder_q-layer.7": 1470.2483, "encoder_q-layer.8": 1474.7146, "encoder_q-layer.9": 1033.2415, "epoch": 0.26, "inbatch_neg_score": 0.2911, "inbatch_pos_score": 0.9209, "learning_rate": 4.0722222222222226e-05, "loss": 3.4925, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.5194, "num_token_overlap": 15.8615, "num_token_query": 42.4609, "num_token_union": 68.3513, "num_word_context": 202.2406, "num_word_doc": 49.646, "num_word_query": 32.0665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2792.8487, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2888, "query_norm": 1.4678, "queue_k_norm": 1.4109, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4609, "sent_len_1": 66.5194, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.96, "stdk": 0.0471, "stdq": 0.046, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.5044, "doc_norm": 1.4201, "encoder_q-embeddings": 3747.5125, "encoder_q-layer.0": 2674.9646, "encoder_q-layer.1": 2762.292, "encoder_q-layer.10": 749.7361, "encoder_q-layer.11": 1870.6683, "encoder_q-layer.2": 2836.2249, "encoder_q-layer.3": 3019.7236, "encoder_q-layer.4": 2883.5593, "encoder_q-layer.5": 2771.4155, "encoder_q-layer.6": 2473.0601, "encoder_q-layer.7": 2311.7432, "encoder_q-layer.8": 2072.1941, "encoder_q-layer.9": 1243.6312, "epoch": 0.26, "inbatch_neg_score": 0.2925, "inbatch_pos_score": 0.9326, "learning_rate": 4.066666666666667e-05, "loss": 3.5044, "norm_diff": 0.0297, "norm_loss": 0.0, "num_token_doc": 66.9412, "num_token_overlap": 15.8366, "num_token_query": 42.2621, "num_token_union": 68.5357, "num_word_context": 202.0495, "num_word_doc": 49.9665, "num_word_query": 31.9072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3916.7286, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2932, "query_norm": 1.4498, "queue_k_norm": 1.4111, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2621, "sent_len_1": 66.9412, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7138, "stdk": 0.0475, "stdq": 0.0451, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4748, "doc_norm": 1.4135, "encoder_q-embeddings": 1262.2731, "encoder_q-layer.0": 878.1632, "encoder_q-layer.1": 971.6093, "encoder_q-layer.10": 656.9567, "encoder_q-layer.11": 1700.7853, "encoder_q-layer.2": 1059.5662, "encoder_q-layer.3": 1201.3391, "encoder_q-layer.4": 1222.7455, "encoder_q-layer.5": 1136.0295, "encoder_q-layer.6": 1035.4094, "encoder_q-layer.7": 908.8004, "encoder_q-layer.8": 936.8101, "encoder_q-layer.9": 691.6132, "epoch": 0.26, "inbatch_neg_score": 0.2788, "inbatch_pos_score": 0.9004, "learning_rate": 4.061111111111111e-05, "loss": 3.4748, "norm_diff": 0.0132, "norm_loss": 0.0, "num_token_doc": 66.9146, "num_token_overlap": 15.8951, "num_token_query": 42.483, "num_token_union": 68.5962, "num_word_context": 202.7556, "num_word_doc": 49.8746, "num_word_query": 32.0779, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1653.7566, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2776, "query_norm": 1.4034, "queue_k_norm": 1.4137, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.483, "sent_len_1": 66.9146, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6025, "stdk": 0.0472, "stdq": 0.0441, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.4858, "doc_norm": 1.424, "encoder_q-embeddings": 14294.4775, "encoder_q-layer.0": 10737.6064, "encoder_q-layer.1": 11032.8408, "encoder_q-layer.10": 743.7115, "encoder_q-layer.11": 1835.0623, "encoder_q-layer.2": 12456.4893, "encoder_q-layer.3": 13683.9121, "encoder_q-layer.4": 13801.7256, "encoder_q-layer.5": 9677.5039, "encoder_q-layer.6": 8776.9209, "encoder_q-layer.7": 5514.7803, "encoder_q-layer.8": 4182.2285, "encoder_q-layer.9": 1881.3025, "epoch": 0.26, "inbatch_neg_score": 0.2692, "inbatch_pos_score": 0.8662, "learning_rate": 4.055555555555556e-05, "loss": 3.4858, "norm_diff": 0.0257, "norm_loss": 0.0, "num_token_doc": 66.9126, "num_token_overlap": 15.8363, "num_token_query": 42.3953, "num_token_union": 68.6274, "num_word_context": 202.347, "num_word_doc": 49.9582, "num_word_query": 31.9964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14877.3026, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2695, "query_norm": 1.3983, "queue_k_norm": 1.4132, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3953, "sent_len_1": 66.9126, "sent_len_max_0": 128.0, "sent_len_max_1": 188.86, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4608, "doc_norm": 1.422, "encoder_q-embeddings": 1480.7197, "encoder_q-layer.0": 1061.2939, "encoder_q-layer.1": 1144.3615, "encoder_q-layer.10": 634.8865, "encoder_q-layer.11": 1703.5238, "encoder_q-layer.2": 1249.4227, "encoder_q-layer.3": 1260.6704, "encoder_q-layer.4": 1271.2994, "encoder_q-layer.5": 1165.1752, "encoder_q-layer.6": 1249.1614, "encoder_q-layer.7": 1058.8734, "encoder_q-layer.8": 946.7964, "encoder_q-layer.9": 661.5414, "epoch": 0.26, "inbatch_neg_score": 0.2694, "inbatch_pos_score": 0.9111, "learning_rate": 4.05e-05, "loss": 3.4608, "norm_diff": 0.012, "norm_loss": 0.0, "num_token_doc": 66.7823, "num_token_overlap": 15.8947, "num_token_query": 42.5291, "num_token_union": 68.5729, "num_word_context": 202.4094, "num_word_doc": 49.8289, "num_word_query": 32.1657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1803.2576, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.269, "query_norm": 1.4227, "queue_k_norm": 1.4151, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5291, "sent_len_1": 66.7823, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.8288, "stdk": 0.0476, "stdq": 0.0448, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.4935, "doc_norm": 1.4083, "encoder_q-embeddings": 820.6779, "encoder_q-layer.0": 574.0383, "encoder_q-layer.1": 583.3802, "encoder_q-layer.10": 705.4446, "encoder_q-layer.11": 2058.314, "encoder_q-layer.2": 547.9635, "encoder_q-layer.3": 581.843, "encoder_q-layer.4": 659.8489, "encoder_q-layer.5": 669.2895, "encoder_q-layer.6": 643.8926, "encoder_q-layer.7": 685.0333, "encoder_q-layer.8": 769.4647, "encoder_q-layer.9": 654.8668, "epoch": 0.27, "inbatch_neg_score": 0.2709, "inbatch_pos_score": 0.8789, "learning_rate": 4.0444444444444444e-05, "loss": 3.4935, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 66.4515, "num_token_overlap": 15.8171, "num_token_query": 42.3261, "num_token_union": 68.3076, "num_word_context": 202.2025, "num_word_doc": 49.5755, "num_word_query": 31.9399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1370.9867, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2695, "query_norm": 1.393, "queue_k_norm": 1.4125, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3261, "sent_len_1": 66.4515, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.6425, "stdk": 0.047, "stdq": 0.0438, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.5016, "doc_norm": 1.409, "encoder_q-embeddings": 1081.8135, "encoder_q-layer.0": 746.088, "encoder_q-layer.1": 788.2211, "encoder_q-layer.10": 623.5524, "encoder_q-layer.11": 1653.6519, "encoder_q-layer.2": 897.4677, "encoder_q-layer.3": 909.4258, "encoder_q-layer.4": 996.5967, "encoder_q-layer.5": 902.7154, "encoder_q-layer.6": 887.1736, "encoder_q-layer.7": 845.4028, "encoder_q-layer.8": 814.5596, "encoder_q-layer.9": 648.2905, "epoch": 0.27, "inbatch_neg_score": 0.2782, "inbatch_pos_score": 0.8867, "learning_rate": 4.038888888888889e-05, "loss": 3.5016, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.7718, "num_token_overlap": 15.7883, "num_token_query": 42.3269, "num_token_union": 68.5256, "num_word_context": 202.2528, "num_word_doc": 49.8335, "num_word_query": 31.9726, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1444.4333, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2773, "query_norm": 1.3955, "queue_k_norm": 1.415, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3269, "sent_len_1": 66.7718, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6887, "stdk": 0.0471, "stdq": 0.0439, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.497, "doc_norm": 1.4082, "encoder_q-embeddings": 1679.9139, "encoder_q-layer.0": 1127.2354, "encoder_q-layer.1": 1335.701, "encoder_q-layer.10": 712.9648, "encoder_q-layer.11": 1833.5164, "encoder_q-layer.2": 1370.3324, "encoder_q-layer.3": 1434.1514, "encoder_q-layer.4": 1422.9581, "encoder_q-layer.5": 1443.0334, "encoder_q-layer.6": 1307.5327, "encoder_q-layer.7": 1032.1847, "encoder_q-layer.8": 995.8982, "encoder_q-layer.9": 769.5583, "epoch": 0.27, "inbatch_neg_score": 0.2754, "inbatch_pos_score": 0.9023, "learning_rate": 4.0333333333333336e-05, "loss": 3.497, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.5692, "num_token_overlap": 15.7796, "num_token_query": 42.4094, "num_token_union": 68.4445, "num_word_context": 202.3505, "num_word_doc": 49.6971, "num_word_query": 32.0587, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1983.2922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2776, "query_norm": 1.4047, "queue_k_norm": 1.4113, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4094, "sent_len_1": 66.5692, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.89, "stdk": 0.0471, "stdq": 0.0443, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.4581, "doc_norm": 1.411, "encoder_q-embeddings": 1107.6809, "encoder_q-layer.0": 722.0313, "encoder_q-layer.1": 771.6309, "encoder_q-layer.10": 748.5095, "encoder_q-layer.11": 1747.2922, "encoder_q-layer.2": 888.4453, "encoder_q-layer.3": 909.4466, "encoder_q-layer.4": 869.4044, "encoder_q-layer.5": 883.8183, "encoder_q-layer.6": 926.7214, "encoder_q-layer.7": 873.1647, "encoder_q-layer.8": 1042.8993, "encoder_q-layer.9": 785.9256, "epoch": 0.27, "inbatch_neg_score": 0.2811, "inbatch_pos_score": 0.9263, "learning_rate": 4.027777777777778e-05, "loss": 3.4581, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 66.9948, "num_token_overlap": 15.8451, "num_token_query": 42.4338, "num_token_union": 68.6759, "num_word_context": 203.039, "num_word_doc": 50.0312, "num_word_query": 32.0429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1516.856, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2808, "query_norm": 1.4237, "queue_k_norm": 1.4149, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4338, "sent_len_1": 66.9948, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.8787, "stdk": 0.0472, "stdq": 0.0448, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.4775, "doc_norm": 1.4167, "encoder_q-embeddings": 1431.1479, "encoder_q-layer.0": 1020.8222, "encoder_q-layer.1": 1181.5959, "encoder_q-layer.10": 669.7685, "encoder_q-layer.11": 1781.6654, "encoder_q-layer.2": 1366.9042, "encoder_q-layer.3": 1436.5166, "encoder_q-layer.4": 1570.2819, "encoder_q-layer.5": 1456.3103, "encoder_q-layer.6": 1404.3878, "encoder_q-layer.7": 1431.106, "encoder_q-layer.8": 1482.7872, "encoder_q-layer.9": 896.7533, "epoch": 0.27, "inbatch_neg_score": 0.2768, "inbatch_pos_score": 0.877, "learning_rate": 4.022222222222222e-05, "loss": 3.4775, "norm_diff": 0.0312, "norm_loss": 0.0, "num_token_doc": 66.4741, "num_token_overlap": 15.7995, "num_token_query": 42.299, "num_token_union": 68.3544, "num_word_context": 202.0298, "num_word_doc": 49.6107, "num_word_query": 31.9529, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2043.6683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2776, "query_norm": 1.3855, "queue_k_norm": 1.4132, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.299, "sent_len_1": 66.4741, "sent_len_max_0": 127.995, "sent_len_max_1": 187.8013, "stdk": 0.0474, "stdq": 0.0436, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.4626, "doc_norm": 1.4132, "encoder_q-embeddings": 3317.9233, "encoder_q-layer.0": 2357.5366, "encoder_q-layer.1": 2695.0305, "encoder_q-layer.10": 1435.3561, "encoder_q-layer.11": 3602.1416, "encoder_q-layer.2": 3076.9771, "encoder_q-layer.3": 3403.8584, "encoder_q-layer.4": 3301.2168, "encoder_q-layer.5": 2782.9124, "encoder_q-layer.6": 2532.9297, "encoder_q-layer.7": 2367.1257, "encoder_q-layer.8": 2122.2192, "encoder_q-layer.9": 1451.0496, "epoch": 0.27, "inbatch_neg_score": 0.2895, "inbatch_pos_score": 0.8833, "learning_rate": 4.016666666666667e-05, "loss": 3.4626, "norm_diff": 0.0145, "norm_loss": 0.0, "num_token_doc": 66.8288, "num_token_overlap": 15.7991, "num_token_query": 42.2381, "num_token_union": 68.4593, "num_word_context": 202.3354, "num_word_doc": 49.8159, "num_word_query": 31.9061, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4210.9775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2871, "query_norm": 1.4134, "queue_k_norm": 1.4163, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2381, "sent_len_1": 66.8288, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.6863, "stdk": 0.0472, "stdq": 0.0443, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.4892, "doc_norm": 1.413, "encoder_q-embeddings": 1949.7765, "encoder_q-layer.0": 1366.8376, "encoder_q-layer.1": 1488.5085, "encoder_q-layer.10": 1367.6011, "encoder_q-layer.11": 3492.3245, "encoder_q-layer.2": 1574.059, "encoder_q-layer.3": 1626.7561, "encoder_q-layer.4": 1606.9889, "encoder_q-layer.5": 1628.473, "encoder_q-layer.6": 1628.9666, "encoder_q-layer.7": 1629.7168, "encoder_q-layer.8": 1620.4318, "encoder_q-layer.9": 1282.1289, "epoch": 0.27, "inbatch_neg_score": 0.2836, "inbatch_pos_score": 0.938, "learning_rate": 4.011111111111111e-05, "loss": 3.4892, "norm_diff": 0.0179, "norm_loss": 0.0, "num_token_doc": 66.5578, "num_token_overlap": 15.8035, "num_token_query": 42.3956, "num_token_union": 68.3803, "num_word_context": 202.3359, "num_word_doc": 49.6628, "num_word_query": 32.0194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2786.66, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2849, "query_norm": 1.423, "queue_k_norm": 1.4146, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3956, "sent_len_1": 66.5578, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3663, "stdk": 0.0472, "stdq": 0.0451, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.4713, "doc_norm": 1.4221, "encoder_q-embeddings": 2472.696, "encoder_q-layer.0": 1792.6307, "encoder_q-layer.1": 2097.1025, "encoder_q-layer.10": 1423.2505, "encoder_q-layer.11": 3542.3843, "encoder_q-layer.2": 2006.0759, "encoder_q-layer.3": 1972.0854, "encoder_q-layer.4": 1981.6049, "encoder_q-layer.5": 2002.615, "encoder_q-layer.6": 1969.975, "encoder_q-layer.7": 1857.1311, "encoder_q-layer.8": 1858.865, "encoder_q-layer.9": 1344.8519, "epoch": 0.27, "inbatch_neg_score": 0.2686, "inbatch_pos_score": 0.9106, "learning_rate": 4.0055555555555554e-05, "loss": 3.4713, "norm_diff": 0.0191, "norm_loss": 0.0, "num_token_doc": 66.868, "num_token_overlap": 15.8343, "num_token_query": 42.429, "num_token_union": 68.5953, "num_word_context": 202.362, "num_word_doc": 49.8917, "num_word_query": 32.0395, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3161.6186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2698, "query_norm": 1.403, "queue_k_norm": 1.4147, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.429, "sent_len_1": 66.868, "sent_len_max_0": 127.995, "sent_len_max_1": 190.0412, "stdk": 0.0476, "stdq": 0.0445, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.4728, "doc_norm": 1.4082, "encoder_q-embeddings": 5294.2192, "encoder_q-layer.0": 3421.1523, "encoder_q-layer.1": 3506.4812, "encoder_q-layer.10": 1324.8866, "encoder_q-layer.11": 3375.3145, "encoder_q-layer.2": 4065.3442, "encoder_q-layer.3": 4999.0498, "encoder_q-layer.4": 4907.8496, "encoder_q-layer.5": 5393.6406, "encoder_q-layer.6": 5551.8711, "encoder_q-layer.7": 4472.6592, "encoder_q-layer.8": 3128.7432, "encoder_q-layer.9": 1641.207, "epoch": 0.27, "inbatch_neg_score": 0.2702, "inbatch_pos_score": 0.9009, "learning_rate": 4e-05, "loss": 3.4728, "norm_diff": 0.0199, "norm_loss": 0.0, "num_token_doc": 66.7893, "num_token_overlap": 15.8373, "num_token_query": 42.281, "num_token_union": 68.4278, "num_word_context": 202.2595, "num_word_doc": 49.8193, "num_word_query": 31.916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6179.4897, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2703, "query_norm": 1.3972, "queue_k_norm": 1.4139, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.281, "sent_len_1": 66.7893, "sent_len_max_0": 127.995, "sent_len_max_1": 189.49, "stdk": 0.0471, "stdq": 0.0444, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 50.9766, "active_queue_size": 16384.0, "cl_loss": 3.4448, "doc_norm": 1.4134, "encoder_q-embeddings": 2176.0947, "encoder_q-layer.0": 1550.2776, "encoder_q-layer.1": 1682.0269, "encoder_q-layer.10": 1580.5778, "encoder_q-layer.11": 3693.8142, "encoder_q-layer.2": 1875.6289, "encoder_q-layer.3": 1860.6769, "encoder_q-layer.4": 1787.1123, "encoder_q-layer.5": 1743.0442, "encoder_q-layer.6": 1723.8789, "encoder_q-layer.7": 1731.325, "encoder_q-layer.8": 1700.6633, "encoder_q-layer.9": 1449.08, "epoch": 0.27, "inbatch_neg_score": 0.2767, "inbatch_pos_score": 0.8833, "learning_rate": 3.9944444444444446e-05, "loss": 3.4448, "norm_diff": 0.0239, "norm_loss": 0.0, "num_token_doc": 66.8821, "num_token_overlap": 15.8188, "num_token_query": 42.3103, "num_token_union": 68.5787, "num_word_context": 202.377, "num_word_doc": 49.8875, "num_word_query": 31.9335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2992.3046, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2761, "query_norm": 1.3895, "queue_k_norm": 1.4137, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3103, "sent_len_1": 66.8821, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9888, "stdk": 0.0473, "stdq": 0.0435, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.4456, "doc_norm": 1.4155, "encoder_q-embeddings": 1521.7336, "encoder_q-layer.0": 1045.6334, "encoder_q-layer.1": 1128.0385, "encoder_q-layer.10": 1283.3717, "encoder_q-layer.11": 3336.8013, "encoder_q-layer.2": 1258.6539, "encoder_q-layer.3": 1261.0477, "encoder_q-layer.4": 1291.5361, "encoder_q-layer.5": 1277.1355, "encoder_q-layer.6": 1346.0037, "encoder_q-layer.7": 1253.0555, "encoder_q-layer.8": 1366.5192, "encoder_q-layer.9": 1214.6107, "epoch": 0.28, "inbatch_neg_score": 0.2707, "inbatch_pos_score": 0.9062, "learning_rate": 3.9888888888888895e-05, "loss": 3.4456, "norm_diff": 0.0221, "norm_loss": 0.0, "num_token_doc": 66.6061, "num_token_overlap": 15.8804, "num_token_query": 42.4158, "num_token_union": 68.4203, "num_word_context": 202.3125, "num_word_doc": 49.7025, "num_word_query": 32.0737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2409.9807, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2712, "query_norm": 1.3952, "queue_k_norm": 1.412, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4158, "sent_len_1": 66.6061, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0, "stdk": 0.0474, "stdq": 0.0441, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.4477, "doc_norm": 1.4136, "encoder_q-embeddings": 7559.1821, "encoder_q-layer.0": 5574.9043, "encoder_q-layer.1": 6417.2793, "encoder_q-layer.10": 1308.8805, "encoder_q-layer.11": 3309.4932, "encoder_q-layer.2": 7508.959, "encoder_q-layer.3": 8820.7197, "encoder_q-layer.4": 8540.2588, "encoder_q-layer.5": 7655.1855, "encoder_q-layer.6": 6889.5581, "encoder_q-layer.7": 5591.6084, "encoder_q-layer.8": 3912.6235, "encoder_q-layer.9": 2148.7334, "epoch": 0.28, "inbatch_neg_score": 0.2661, "inbatch_pos_score": 0.8721, "learning_rate": 3.983333333333333e-05, "loss": 3.4477, "norm_diff": 0.0395, "norm_loss": 0.0, "num_token_doc": 66.7562, "num_token_overlap": 15.8515, "num_token_query": 42.3491, "num_token_union": 68.4443, "num_word_context": 202.2426, "num_word_doc": 49.8349, "num_word_query": 31.9932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9452.8007, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2666, "query_norm": 1.3745, "queue_k_norm": 1.4143, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3491, "sent_len_1": 66.7562, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.5987, "stdk": 0.0473, "stdq": 0.0435, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4494, "doc_norm": 1.4134, "encoder_q-embeddings": 4454.8252, "encoder_q-layer.0": 3087.3616, "encoder_q-layer.1": 3591.3342, "encoder_q-layer.10": 1391.4788, "encoder_q-layer.11": 3404.8027, "encoder_q-layer.2": 4315.377, "encoder_q-layer.3": 4757.9019, "encoder_q-layer.4": 4752.147, "encoder_q-layer.5": 4434.4863, "encoder_q-layer.6": 4556.4263, "encoder_q-layer.7": 4022.1135, "encoder_q-layer.8": 3418.6829, "encoder_q-layer.9": 1838.5922, "epoch": 0.28, "inbatch_neg_score": 0.266, "inbatch_pos_score": 0.9277, "learning_rate": 3.977777777777778e-05, "loss": 3.4494, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.8667, "num_token_overlap": 15.793, "num_token_query": 42.3001, "num_token_union": 68.552, "num_word_context": 202.2667, "num_word_doc": 49.9084, "num_word_query": 31.9544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5749.9481, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2659, "query_norm": 1.436, "queue_k_norm": 1.4128, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3001, "sent_len_1": 66.8667, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.0012, "stdk": 0.0473, "stdq": 0.0457, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4709, "doc_norm": 1.418, "encoder_q-embeddings": 2395.3267, "encoder_q-layer.0": 1709.9318, "encoder_q-layer.1": 1850.0182, "encoder_q-layer.10": 1332.3073, "encoder_q-layer.11": 3391.9373, "encoder_q-layer.2": 1951.1187, "encoder_q-layer.3": 1953.593, "encoder_q-layer.4": 1940.8113, "encoder_q-layer.5": 1892.3068, "encoder_q-layer.6": 1928.4574, "encoder_q-layer.7": 1921.1112, "encoder_q-layer.8": 1755.5995, "encoder_q-layer.9": 1413.8339, "epoch": 0.28, "inbatch_neg_score": 0.261, "inbatch_pos_score": 0.8867, "learning_rate": 3.972222222222222e-05, "loss": 3.4709, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 66.9102, "num_token_overlap": 15.8532, "num_token_query": 42.3249, "num_token_union": 68.4899, "num_word_context": 202.3892, "num_word_doc": 49.8806, "num_word_query": 31.9887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3097.2461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2595, "query_norm": 1.3814, "queue_k_norm": 1.412, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3249, "sent_len_1": 66.9102, "sent_len_max_0": 127.9887, "sent_len_max_1": 193.1825, "stdk": 0.0475, "stdq": 0.0441, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 49.3164, "active_queue_size": 16384.0, "cl_loss": 3.4392, "doc_norm": 1.4043, "encoder_q-embeddings": 11439.167, "encoder_q-layer.0": 8704.0293, "encoder_q-layer.1": 10001.3516, "encoder_q-layer.10": 1638.5094, "encoder_q-layer.11": 3592.0901, "encoder_q-layer.2": 13034.6582, "encoder_q-layer.3": 14328.4092, "encoder_q-layer.4": 16658.0176, "encoder_q-layer.5": 21325.7637, "encoder_q-layer.6": 14912.3613, "encoder_q-layer.7": 13647.3613, "encoder_q-layer.8": 9281.0576, "encoder_q-layer.9": 3845.9453, "epoch": 0.28, "inbatch_neg_score": 0.2496, "inbatch_pos_score": 0.8896, "learning_rate": 3.966666666666667e-05, "loss": 3.4392, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.7935, "num_token_overlap": 15.8769, "num_token_query": 42.3555, "num_token_union": 68.4645, "num_word_context": 202.2218, "num_word_doc": 49.8479, "num_word_query": 31.9925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18010.2106, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2493, "query_norm": 1.4179, "queue_k_norm": 1.4116, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3555, "sent_len_1": 66.7935, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2038, "stdk": 0.047, "stdq": 0.0456, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.4509, "doc_norm": 1.4049, "encoder_q-embeddings": 18508.25, "encoder_q-layer.0": 12556.2842, "encoder_q-layer.1": 13387.5664, "encoder_q-layer.10": 1457.3768, "encoder_q-layer.11": 3452.6772, "encoder_q-layer.2": 13662.7236, "encoder_q-layer.3": 13014.0918, "encoder_q-layer.4": 13231.4932, "encoder_q-layer.5": 11771.5967, "encoder_q-layer.6": 14764.6689, "encoder_q-layer.7": 13441.6455, "encoder_q-layer.8": 10081.7578, "encoder_q-layer.9": 3934.3953, "epoch": 0.28, "inbatch_neg_score": 0.2441, "inbatch_pos_score": 0.8789, "learning_rate": 3.961111111111111e-05, "loss": 3.4509, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.5685, "num_token_overlap": 15.7497, "num_token_query": 42.1915, "num_token_union": 68.3435, "num_word_context": 202.148, "num_word_doc": 49.6434, "num_word_query": 31.8597, "postclip_grad_norm": 1.0, "preclip_grad_norm": 18892.0088, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2438, "query_norm": 1.4003, "queue_k_norm": 1.4101, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1915, "sent_len_1": 66.5685, "sent_len_max_0": 128.0, "sent_len_max_1": 188.59, "stdk": 0.0471, "stdq": 0.0448, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.4567, "doc_norm": 1.4023, "encoder_q-embeddings": 2245.5625, "encoder_q-layer.0": 1540.1119, "encoder_q-layer.1": 1700.5635, "encoder_q-layer.10": 1284.3604, "encoder_q-layer.11": 3156.5857, "encoder_q-layer.2": 1661.7235, "encoder_q-layer.3": 1625.1935, "encoder_q-layer.4": 1532.7435, "encoder_q-layer.5": 1419.6179, "encoder_q-layer.6": 1499.8342, "encoder_q-layer.7": 1574.0061, "encoder_q-layer.8": 1562.8566, "encoder_q-layer.9": 1309.0425, "epoch": 0.28, "inbatch_neg_score": 0.2526, "inbatch_pos_score": 0.873, "learning_rate": 3.9555555555555556e-05, "loss": 3.4567, "norm_diff": 0.0141, "norm_loss": 0.0, "num_token_doc": 66.7578, "num_token_overlap": 15.7742, "num_token_query": 42.1596, "num_token_union": 68.4003, "num_word_context": 202.3682, "num_word_doc": 49.848, "num_word_query": 31.8514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2718.239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2512, "query_norm": 1.4, "queue_k_norm": 1.4121, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1596, "sent_len_1": 66.7578, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8388, "stdk": 0.047, "stdq": 0.0447, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.4484, "doc_norm": 1.4113, "encoder_q-embeddings": 1605.8547, "encoder_q-layer.0": 1074.2432, "encoder_q-layer.1": 1152.1022, "encoder_q-layer.10": 1256.3137, "encoder_q-layer.11": 3245.6931, "encoder_q-layer.2": 1305.4581, "encoder_q-layer.3": 1327.5609, "encoder_q-layer.4": 1328.954, "encoder_q-layer.5": 1217.9176, "encoder_q-layer.6": 1296.1287, "encoder_q-layer.7": 1352.5206, "encoder_q-layer.8": 1454.7487, "encoder_q-layer.9": 1277.6162, "epoch": 0.28, "inbatch_neg_score": 0.2507, "inbatch_pos_score": 0.8896, "learning_rate": 3.9500000000000005e-05, "loss": 3.4484, "norm_diff": 0.0188, "norm_loss": 0.0, "num_token_doc": 66.7227, "num_token_overlap": 15.7386, "num_token_query": 42.0786, "num_token_union": 68.3142, "num_word_context": 201.8629, "num_word_doc": 49.7578, "num_word_query": 31.7543, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2420.7911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2498, "query_norm": 1.4021, "queue_k_norm": 1.4096, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.0786, "sent_len_1": 66.7227, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.06, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.4319, "doc_norm": 1.4123, "encoder_q-embeddings": 1124.9938, "encoder_q-layer.0": 798.2239, "encoder_q-layer.1": 889.9852, "encoder_q-layer.10": 627.907, "encoder_q-layer.11": 1497.7397, "encoder_q-layer.2": 981.4064, "encoder_q-layer.3": 986.637, "encoder_q-layer.4": 1010.8694, "encoder_q-layer.5": 995.1174, "encoder_q-layer.6": 1008.1914, "encoder_q-layer.7": 926.303, "encoder_q-layer.8": 887.9608, "encoder_q-layer.9": 655.9828, "epoch": 0.28, "inbatch_neg_score": 0.2545, "inbatch_pos_score": 0.8979, "learning_rate": 3.944444444444445e-05, "loss": 3.4319, "norm_diff": 0.0137, "norm_loss": 0.0, "num_token_doc": 66.5997, "num_token_overlap": 15.8082, "num_token_query": 42.2822, "num_token_union": 68.3731, "num_word_context": 202.2443, "num_word_doc": 49.6932, "num_word_query": 31.9116, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1471.5685, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2537, "query_norm": 1.4174, "queue_k_norm": 1.4104, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2822, "sent_len_1": 66.5997, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2675, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 50.0977, "active_queue_size": 16384.0, "cl_loss": 3.409, "doc_norm": 1.3983, "encoder_q-embeddings": 851.3611, "encoder_q-layer.0": 573.6926, "encoder_q-layer.1": 616.3076, "encoder_q-layer.10": 634.4087, "encoder_q-layer.11": 1653.0054, "encoder_q-layer.2": 713.3794, "encoder_q-layer.3": 705.8307, "encoder_q-layer.4": 708.2374, "encoder_q-layer.5": 741.9642, "encoder_q-layer.6": 735.9173, "encoder_q-layer.7": 720.5793, "encoder_q-layer.8": 750.2729, "encoder_q-layer.9": 636.1734, "epoch": 0.28, "inbatch_neg_score": 0.2572, "inbatch_pos_score": 0.8657, "learning_rate": 3.938888888888889e-05, "loss": 3.409, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.7968, "num_token_overlap": 15.9189, "num_token_query": 42.4596, "num_token_union": 68.4744, "num_word_context": 202.3665, "num_word_doc": 49.837, "num_word_query": 32.0703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1261.0877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2563, "query_norm": 1.3994, "queue_k_norm": 1.4086, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4596, "sent_len_1": 66.7968, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1438, "stdk": 0.0469, "stdq": 0.0443, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.4141, "doc_norm": 1.4017, "encoder_q-embeddings": 1536.917, "encoder_q-layer.0": 1173.3455, "encoder_q-layer.1": 1293.1583, "encoder_q-layer.10": 776.6354, "encoder_q-layer.11": 1712.0431, "encoder_q-layer.2": 1462.9417, "encoder_q-layer.3": 1511.1949, "encoder_q-layer.4": 1519.2877, "encoder_q-layer.5": 1409.1628, "encoder_q-layer.6": 1435.5782, "encoder_q-layer.7": 1210.3683, "encoder_q-layer.8": 952.4085, "encoder_q-layer.9": 761.0309, "epoch": 0.29, "inbatch_neg_score": 0.2603, "inbatch_pos_score": 0.8613, "learning_rate": 3.933333333333333e-05, "loss": 3.4141, "norm_diff": 0.0222, "norm_loss": 0.0, "num_token_doc": 66.7928, "num_token_overlap": 15.8533, "num_token_query": 42.3185, "num_token_union": 68.4696, "num_word_context": 202.2429, "num_word_doc": 49.8632, "num_word_query": 31.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1959.773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.4211, "queue_k_norm": 1.4093, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3185, "sent_len_1": 66.7928, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7138, "stdk": 0.047, "stdq": 0.0452, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.4234, "doc_norm": 1.4112, "encoder_q-embeddings": 1178.8672, "encoder_q-layer.0": 885.8824, "encoder_q-layer.1": 878.0432, "encoder_q-layer.10": 619.814, "encoder_q-layer.11": 1434.9243, "encoder_q-layer.2": 993.6221, "encoder_q-layer.3": 1174.1748, "encoder_q-layer.4": 1210.9614, "encoder_q-layer.5": 1129.6659, "encoder_q-layer.6": 1042.5885, "encoder_q-layer.7": 1080.6259, "encoder_q-layer.8": 944.0194, "encoder_q-layer.9": 682.9798, "epoch": 0.29, "inbatch_neg_score": 0.2635, "inbatch_pos_score": 0.917, "learning_rate": 3.927777777777778e-05, "loss": 3.4234, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.6355, "num_token_overlap": 15.8811, "num_token_query": 42.4431, "num_token_union": 68.4338, "num_word_context": 202.0134, "num_word_doc": 49.6887, "num_word_query": 32.0645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1603.4543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2629, "query_norm": 1.4194, "queue_k_norm": 1.4105, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4431, "sent_len_1": 66.6355, "sent_len_max_0": 128.0, "sent_len_max_1": 189.39, "stdk": 0.0474, "stdq": 0.0449, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.4511, "doc_norm": 1.4061, "encoder_q-embeddings": 870.4936, "encoder_q-layer.0": 575.3787, "encoder_q-layer.1": 627.559, "encoder_q-layer.10": 655.1125, "encoder_q-layer.11": 1594.0359, "encoder_q-layer.2": 721.3503, "encoder_q-layer.3": 820.5662, "encoder_q-layer.4": 802.6142, "encoder_q-layer.5": 774.7411, "encoder_q-layer.6": 759.0797, "encoder_q-layer.7": 741.3055, "encoder_q-layer.8": 831.0583, "encoder_q-layer.9": 639.3644, "epoch": 0.29, "inbatch_neg_score": 0.2604, "inbatch_pos_score": 0.8877, "learning_rate": 3.922222222222223e-05, "loss": 3.4511, "norm_diff": 0.0078, "norm_loss": 0.0, "num_token_doc": 66.9057, "num_token_overlap": 15.8038, "num_token_query": 42.2038, "num_token_union": 68.4624, "num_word_context": 202.302, "num_word_doc": 49.9166, "num_word_query": 31.8745, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1276.4437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2593, "query_norm": 1.4038, "queue_k_norm": 1.4112, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2038, "sent_len_1": 66.9057, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.345, "stdk": 0.0472, "stdq": 0.0446, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.4506, "doc_norm": 1.4058, "encoder_q-embeddings": 807.1686, "encoder_q-layer.0": 517.5856, "encoder_q-layer.1": 566.2764, "encoder_q-layer.10": 632.5014, "encoder_q-layer.11": 1525.663, "encoder_q-layer.2": 644.8419, "encoder_q-layer.3": 713.4255, "encoder_q-layer.4": 725.6965, "encoder_q-layer.5": 679.968, "encoder_q-layer.6": 715.7519, "encoder_q-layer.7": 723.5525, "encoder_q-layer.8": 725.4959, "encoder_q-layer.9": 626.9164, "epoch": 0.29, "inbatch_neg_score": 0.2557, "inbatch_pos_score": 0.8809, "learning_rate": 3.9166666666666665e-05, "loss": 3.4506, "norm_diff": 0.0158, "norm_loss": 0.0, "num_token_doc": 66.8132, "num_token_overlap": 15.7623, "num_token_query": 42.2195, "num_token_union": 68.4683, "num_word_context": 202.305, "num_word_doc": 49.857, "num_word_query": 31.874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1186.0492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2554, "query_norm": 1.3938, "queue_k_norm": 1.4096, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2195, "sent_len_1": 66.8132, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.9787, "stdk": 0.0472, "stdq": 0.0442, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.4234, "doc_norm": 1.4061, "encoder_q-embeddings": 1294.9636, "encoder_q-layer.0": 843.6025, "encoder_q-layer.1": 838.8359, "encoder_q-layer.10": 641.1145, "encoder_q-layer.11": 1504.9612, "encoder_q-layer.2": 935.3458, "encoder_q-layer.3": 931.6103, "encoder_q-layer.4": 956.3289, "encoder_q-layer.5": 996.039, "encoder_q-layer.6": 1059.9127, "encoder_q-layer.7": 1030.2483, "encoder_q-layer.8": 939.002, "encoder_q-layer.9": 678.5518, "epoch": 0.29, "inbatch_neg_score": 0.2641, "inbatch_pos_score": 0.8901, "learning_rate": 3.9111111111111115e-05, "loss": 3.4234, "norm_diff": 0.0192, "norm_loss": 0.0, "num_token_doc": 66.775, "num_token_overlap": 15.7826, "num_token_query": 42.1251, "num_token_union": 68.4197, "num_word_context": 202.1657, "num_word_doc": 49.8664, "num_word_query": 31.8069, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1535.0233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2644, "query_norm": 1.3979, "queue_k_norm": 1.4106, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1251, "sent_len_1": 66.775, "sent_len_max_0": 127.9925, "sent_len_max_1": 187.5525, "stdk": 0.0473, "stdq": 0.0443, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.4366, "doc_norm": 1.4132, "encoder_q-embeddings": 1157.4451, "encoder_q-layer.0": 804.0948, "encoder_q-layer.1": 868.0015, "encoder_q-layer.10": 662.5903, "encoder_q-layer.11": 1633.4299, "encoder_q-layer.2": 914.829, "encoder_q-layer.3": 895.6356, "encoder_q-layer.4": 841.8836, "encoder_q-layer.5": 854.8435, "encoder_q-layer.6": 952.4696, "encoder_q-layer.7": 958.726, "encoder_q-layer.8": 895.5627, "encoder_q-layer.9": 700.217, "epoch": 0.29, "inbatch_neg_score": 0.2609, "inbatch_pos_score": 0.8662, "learning_rate": 3.905555555555556e-05, "loss": 3.4366, "norm_diff": 0.0208, "norm_loss": 0.0, "num_token_doc": 66.8093, "num_token_overlap": 15.8208, "num_token_query": 42.4046, "num_token_union": 68.5687, "num_word_context": 202.4697, "num_word_doc": 49.844, "num_word_query": 32.0259, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1468.4002, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2603, "query_norm": 1.394, "queue_k_norm": 1.4108, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4046, "sent_len_1": 66.8093, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.25, "stdk": 0.0475, "stdq": 0.0444, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.4212, "doc_norm": 1.4039, "encoder_q-embeddings": 1200.2756, "encoder_q-layer.0": 840.4042, "encoder_q-layer.1": 941.8848, "encoder_q-layer.10": 624.7402, "encoder_q-layer.11": 1522.0728, "encoder_q-layer.2": 953.845, "encoder_q-layer.3": 1016.4305, "encoder_q-layer.4": 982.9066, "encoder_q-layer.5": 999.7695, "encoder_q-layer.6": 1122.9828, "encoder_q-layer.7": 1139.0841, "encoder_q-layer.8": 1088.1982, "encoder_q-layer.9": 765.2592, "epoch": 0.29, "inbatch_neg_score": 0.2648, "inbatch_pos_score": 0.8862, "learning_rate": 3.9000000000000006e-05, "loss": 3.4212, "norm_diff": 0.0181, "norm_loss": 0.0, "num_token_doc": 66.6789, "num_token_overlap": 15.8853, "num_token_query": 42.4756, "num_token_union": 68.4518, "num_word_context": 202.2232, "num_word_doc": 49.7364, "num_word_query": 32.0728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1574.6571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2637, "query_norm": 1.3976, "queue_k_norm": 1.4101, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4756, "sent_len_1": 66.6789, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3537, "stdk": 0.0472, "stdq": 0.0444, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 49.6094, "active_queue_size": 16384.0, "cl_loss": 3.4217, "doc_norm": 1.4069, "encoder_q-embeddings": 841.9222, "encoder_q-layer.0": 604.8436, "encoder_q-layer.1": 607.4921, "encoder_q-layer.10": 645.9324, "encoder_q-layer.11": 1643.6826, "encoder_q-layer.2": 659.9235, "encoder_q-layer.3": 708.9949, "encoder_q-layer.4": 721.6975, "encoder_q-layer.5": 684.4702, "encoder_q-layer.6": 726.7197, "encoder_q-layer.7": 812.0695, "encoder_q-layer.8": 837.6639, "encoder_q-layer.9": 671.1519, "epoch": 0.29, "inbatch_neg_score": 0.2587, "inbatch_pos_score": 0.8774, "learning_rate": 3.894444444444444e-05, "loss": 3.4217, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.8479, "num_token_overlap": 15.7956, "num_token_query": 42.2435, "num_token_union": 68.5305, "num_word_context": 202.5839, "num_word_doc": 49.8796, "num_word_query": 31.8891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1276.0536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2578, "query_norm": 1.3954, "queue_k_norm": 1.4121, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2435, "sent_len_1": 66.8479, "sent_len_max_0": 127.9788, "sent_len_max_1": 190.0375, "stdk": 0.0473, "stdq": 0.0446, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 50.7812, "active_queue_size": 16384.0, "cl_loss": 3.4291, "doc_norm": 1.4067, "encoder_q-embeddings": 764.8147, "encoder_q-layer.0": 517.192, "encoder_q-layer.1": 573.9947, "encoder_q-layer.10": 652.7324, "encoder_q-layer.11": 1647.8628, "encoder_q-layer.2": 613.2784, "encoder_q-layer.3": 618.1572, "encoder_q-layer.4": 593.7128, "encoder_q-layer.5": 581.8627, "encoder_q-layer.6": 609.9216, "encoder_q-layer.7": 635.4725, "encoder_q-layer.8": 737.6065, "encoder_q-layer.9": 640.6577, "epoch": 0.29, "inbatch_neg_score": 0.2494, "inbatch_pos_score": 0.8535, "learning_rate": 3.888888888888889e-05, "loss": 3.4291, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.6366, "num_token_overlap": 15.8091, "num_token_query": 42.2844, "num_token_union": 68.384, "num_word_context": 202.0063, "num_word_doc": 49.7382, "num_word_query": 31.9278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1169.6734, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2494, "query_norm": 1.3775, "queue_k_norm": 1.4127, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2844, "sent_len_1": 66.6366, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.0737, "stdk": 0.0473, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 27.0201, "dev_samples_per_second": 2.369, "dev_steps_per_second": 0.037, "epoch": 0.29, "step": 30000, "test_accuracy": 92.27294921875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4534218907356262, "test_doc_norm": 1.3764455318450928, "test_inbatch_neg_score": 0.57303386926651, "test_inbatch_pos_score": 1.4250423908233643, "test_loss": 0.4534218907356262, "test_loss_align": 1.1061375141143799, "test_loss_unif": 3.8583579063415527, "test_loss_unif_q@queue": 3.8583579063415527, "test_norm_diff": 0.04949508234858513, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2503693103790283, "test_query_norm": 1.425940752029419, "test_queue_k_norm": 1.4120254516601562, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04092820733785629, "test_stdq": 0.040675196796655655, "test_stdqueue_k": 0.047578200697898865, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.0201, "dev_samples_per_second": 2.369, "dev_steps_per_second": 0.037, "epoch": 0.29, "eval_beir-arguana_ndcg@10": 0.34503, "eval_beir-arguana_recall@10": 0.59957, "eval_beir-arguana_recall@100": 0.90256, "eval_beir-arguana_recall@20": 0.73329, "eval_beir-avg_ndcg@10": 0.34069674999999994, "eval_beir-avg_recall@10": 0.41182299999999994, "eval_beir-avg_recall@100": 0.5892308333333334, "eval_beir-avg_recall@20": 0.46755683333333337, "eval_beir-cqadupstack_ndcg@10": 0.2218975, "eval_beir-cqadupstack_recall@10": 0.30667, "eval_beir-cqadupstack_recall@100": 0.5346883333333332, "eval_beir-cqadupstack_recall@20": 0.37187833333333337, "eval_beir-fiqa_ndcg@10": 0.1988, "eval_beir-fiqa_recall@10": 0.25839, "eval_beir-fiqa_recall@100": 0.51246, "eval_beir-fiqa_recall@20": 0.33023, "eval_beir-nfcorpus_ndcg@10": 0.258, "eval_beir-nfcorpus_recall@10": 0.1299, "eval_beir-nfcorpus_recall@100": 0.25785, "eval_beir-nfcorpus_recall@20": 0.15639, "eval_beir-nq_ndcg@10": 0.24144, "eval_beir-nq_recall@10": 0.39366, "eval_beir-nq_recall@100": 0.72226, "eval_beir-nq_recall@20": 0.49343, "eval_beir-quora_ndcg@10": 0.71159, "eval_beir-quora_recall@10": 0.83212, "eval_beir-quora_recall@100": 0.956, "eval_beir-quora_recall@20": 0.88535, "eval_beir-scidocs_ndcg@10": 0.13235, "eval_beir-scidocs_recall@10": 0.13963, "eval_beir-scidocs_recall@100": 0.33037, "eval_beir-scidocs_recall@20": 0.18717, "eval_beir-scifact_ndcg@10": 0.56948, "eval_beir-scifact_recall@10": 0.73861, "eval_beir-scifact_recall@100": 0.881, "eval_beir-scifact_recall@20": 0.79178, "eval_beir-trec-covid_ndcg@10": 0.53278, "eval_beir-trec-covid_recall@10": 0.584, "eval_beir-trec-covid_recall@100": 0.3946, "eval_beir-trec-covid_recall@20": 0.521, "eval_beir-webis-touche2020_ndcg@10": 0.1956, "eval_beir-webis-touche2020_recall@10": 0.13568, "eval_beir-webis-touche2020_recall@100": 0.40052, "eval_beir-webis-touche2020_recall@20": 0.20505, "eval_senteval-avg_sts": 0.7454325652191629, "eval_senteval-sickr_spearman": 0.708459658803089, "eval_senteval-stsb_spearman": 0.7824054716352368, "step": 30000, "test_accuracy": 92.27294921875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4534218907356262, "test_doc_norm": 1.3764455318450928, "test_inbatch_neg_score": 0.57303386926651, "test_inbatch_pos_score": 1.4250423908233643, "test_loss": 0.4534218907356262, "test_loss_align": 1.1061375141143799, "test_loss_unif": 3.8583579063415527, "test_loss_unif_q@queue": 3.8583579063415527, "test_norm_diff": 0.04949508234858513, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2503693103790283, "test_query_norm": 1.425940752029419, "test_queue_k_norm": 1.4120254516601562, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04092820733785629, "test_stdq": 0.040675196796655655, "test_stdqueue_k": 0.047578200697898865, "test_stdqueue_q": 0.0 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.4218, "doc_norm": 1.406, "encoder_q-embeddings": 846.687, "encoder_q-layer.0": 599.5317, "encoder_q-layer.1": 633.647, "encoder_q-layer.10": 681.3382, "encoder_q-layer.11": 1466.8075, "encoder_q-layer.2": 691.5358, "encoder_q-layer.3": 703.1478, "encoder_q-layer.4": 689.3593, "encoder_q-layer.5": 675.7283, "encoder_q-layer.6": 710.4652, "encoder_q-layer.7": 698.8611, "encoder_q-layer.8": 757.706, "encoder_q-layer.9": 667.3508, "epoch": 0.29, "inbatch_neg_score": 0.2418, "inbatch_pos_score": 0.9248, "learning_rate": 3.883333333333333e-05, "loss": 3.4218, "norm_diff": 0.0185, "norm_loss": 0.0, "num_token_doc": 66.9721, "num_token_overlap": 15.8041, "num_token_query": 42.2915, "num_token_union": 68.6051, "num_word_context": 202.6147, "num_word_doc": 49.9669, "num_word_query": 31.9389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1180.3138, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2423, "query_norm": 1.4129, "queue_k_norm": 1.412, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2915, "sent_len_1": 66.9721, "sent_len_max_0": 127.985, "sent_len_max_1": 189.3613, "stdk": 0.0473, "stdq": 0.0459, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.4077, "doc_norm": 1.4128, "encoder_q-embeddings": 1589.8336, "encoder_q-layer.0": 1158.2317, "encoder_q-layer.1": 1284.6743, "encoder_q-layer.10": 717.8068, "encoder_q-layer.11": 1571.4391, "encoder_q-layer.2": 1586.2285, "encoder_q-layer.3": 1558.6488, "encoder_q-layer.4": 1609.1653, "encoder_q-layer.5": 1631.5497, "encoder_q-layer.6": 1548.9297, "encoder_q-layer.7": 1296.5409, "encoder_q-layer.8": 1180.5518, "encoder_q-layer.9": 853.4008, "epoch": 0.29, "inbatch_neg_score": 0.2397, "inbatch_pos_score": 0.8721, "learning_rate": 3.877777777777778e-05, "loss": 3.4077, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.9788, "num_token_overlap": 15.8821, "num_token_query": 42.4917, "num_token_union": 68.672, "num_word_context": 202.5798, "num_word_doc": 49.9561, "num_word_query": 32.0766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2087.5886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2401, "query_norm": 1.3582, "queue_k_norm": 1.4105, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4917, "sent_len_1": 66.9788, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7463, "stdk": 0.0476, "stdq": 0.0437, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.4024, "doc_norm": 1.409, "encoder_q-embeddings": 889.2188, "encoder_q-layer.0": 612.629, "encoder_q-layer.1": 655.4257, "encoder_q-layer.10": 622.1932, "encoder_q-layer.11": 1516.6406, "encoder_q-layer.2": 739.2556, "encoder_q-layer.3": 671.0726, "encoder_q-layer.4": 629.8968, "encoder_q-layer.5": 568.9963, "encoder_q-layer.6": 616.6765, "encoder_q-layer.7": 654.3973, "encoder_q-layer.8": 735.3975, "encoder_q-layer.9": 602.1194, "epoch": 0.3, "inbatch_neg_score": 0.2353, "inbatch_pos_score": 0.8525, "learning_rate": 3.8722222222222225e-05, "loss": 3.4024, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.7785, "num_token_overlap": 15.8409, "num_token_query": 42.3624, "num_token_union": 68.4733, "num_word_context": 202.3064, "num_word_doc": 49.8553, "num_word_query": 32.0153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1185.4849, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2351, "query_norm": 1.3596, "queue_k_norm": 1.4107, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3624, "sent_len_1": 66.7785, "sent_len_max_0": 128.0, "sent_len_max_1": 187.5163, "stdk": 0.0475, "stdq": 0.0437, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.4267, "doc_norm": 1.415, "encoder_q-embeddings": 835.9138, "encoder_q-layer.0": 555.081, "encoder_q-layer.1": 581.1824, "encoder_q-layer.10": 641.3644, "encoder_q-layer.11": 1488.0856, "encoder_q-layer.2": 607.2679, "encoder_q-layer.3": 614.9821, "encoder_q-layer.4": 640.998, "encoder_q-layer.5": 664.916, "encoder_q-layer.6": 706.3799, "encoder_q-layer.7": 711.8899, "encoder_q-layer.8": 722.4698, "encoder_q-layer.9": 641.0499, "epoch": 0.3, "inbatch_neg_score": 0.2341, "inbatch_pos_score": 0.8608, "learning_rate": 3.866666666666667e-05, "loss": 3.4267, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.7546, "num_token_overlap": 15.853, "num_token_query": 42.288, "num_token_union": 68.4059, "num_word_context": 202.0061, "num_word_doc": 49.7392, "num_word_query": 31.9142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1163.65, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.233, "query_norm": 1.3705, "queue_k_norm": 1.4089, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.288, "sent_len_1": 66.7546, "sent_len_max_0": 127.9737, "sent_len_max_1": 191.6925, "stdk": 0.0477, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.402, "doc_norm": 1.405, "encoder_q-embeddings": 687.4146, "encoder_q-layer.0": 455.1756, "encoder_q-layer.1": 481.5659, "encoder_q-layer.10": 623.8096, "encoder_q-layer.11": 1583.0204, "encoder_q-layer.2": 516.1031, "encoder_q-layer.3": 512.4432, "encoder_q-layer.4": 514.8663, "encoder_q-layer.5": 496.9205, "encoder_q-layer.6": 535.2261, "encoder_q-layer.7": 589.0101, "encoder_q-layer.8": 686.9412, "encoder_q-layer.9": 607.1109, "epoch": 0.3, "inbatch_neg_score": 0.2354, "inbatch_pos_score": 0.8696, "learning_rate": 3.8611111111111116e-05, "loss": 3.402, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.7407, "num_token_overlap": 15.8962, "num_token_query": 42.3309, "num_token_union": 68.3707, "num_word_context": 202.1317, "num_word_doc": 49.7998, "num_word_query": 31.9951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1059.323, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.234, "query_norm": 1.3838, "queue_k_norm": 1.4107, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3309, "sent_len_1": 66.7407, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.6387, "stdk": 0.0473, "stdq": 0.0448, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.4057, "doc_norm": 1.4069, "encoder_q-embeddings": 790.5898, "encoder_q-layer.0": 499.6115, "encoder_q-layer.1": 506.0468, "encoder_q-layer.10": 673.4287, "encoder_q-layer.11": 1546.1793, "encoder_q-layer.2": 550.6011, "encoder_q-layer.3": 560.9213, "encoder_q-layer.4": 550.7579, "encoder_q-layer.5": 560.8346, "encoder_q-layer.6": 611.3288, "encoder_q-layer.7": 646.2004, "encoder_q-layer.8": 710.4116, "encoder_q-layer.9": 619.4777, "epoch": 0.3, "inbatch_neg_score": 0.233, "inbatch_pos_score": 0.8584, "learning_rate": 3.855555555555556e-05, "loss": 3.4057, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.7479, "num_token_overlap": 15.8198, "num_token_query": 42.4274, "num_token_union": 68.5398, "num_word_context": 202.3675, "num_word_doc": 49.8466, "num_word_query": 32.0552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1110.756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.231, "query_norm": 1.3658, "queue_k_norm": 1.4092, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4274, "sent_len_1": 66.7479, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4338, "stdk": 0.0474, "stdq": 0.0441, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.4061, "doc_norm": 1.4025, "encoder_q-embeddings": 1781.3303, "encoder_q-layer.0": 1146.6995, "encoder_q-layer.1": 1357.7433, "encoder_q-layer.10": 619.6176, "encoder_q-layer.11": 1451.0222, "encoder_q-layer.2": 1700.4663, "encoder_q-layer.3": 1878.6401, "encoder_q-layer.4": 1860.9158, "encoder_q-layer.5": 1664.3801, "encoder_q-layer.6": 1616.6752, "encoder_q-layer.7": 1746.2802, "encoder_q-layer.8": 1397.9331, "encoder_q-layer.9": 794.5828, "epoch": 0.3, "inbatch_neg_score": 0.2332, "inbatch_pos_score": 0.8535, "learning_rate": 3.85e-05, "loss": 3.4061, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.7699, "num_token_overlap": 15.7772, "num_token_query": 42.2441, "num_token_union": 68.4848, "num_word_context": 202.25, "num_word_doc": 49.8049, "num_word_query": 31.9329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2260.4972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2334, "query_norm": 1.3543, "queue_k_norm": 1.4085, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2441, "sent_len_1": 66.7699, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.0213, "stdk": 0.0473, "stdq": 0.0437, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.4062, "doc_norm": 1.4114, "encoder_q-embeddings": 693.9426, "encoder_q-layer.0": 466.0312, "encoder_q-layer.1": 480.0156, "encoder_q-layer.10": 717.3405, "encoder_q-layer.11": 1447.0349, "encoder_q-layer.2": 548.8135, "encoder_q-layer.3": 575.5172, "encoder_q-layer.4": 585.2315, "encoder_q-layer.5": 581.9706, "encoder_q-layer.6": 662.4944, "encoder_q-layer.7": 751.6934, "encoder_q-layer.8": 863.2811, "encoder_q-layer.9": 694.6695, "epoch": 0.3, "inbatch_neg_score": 0.2242, "inbatch_pos_score": 0.8521, "learning_rate": 3.844444444444444e-05, "loss": 3.4062, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 66.6658, "num_token_overlap": 15.8202, "num_token_query": 42.2206, "num_token_union": 68.3712, "num_word_context": 202.3454, "num_word_doc": 49.7524, "num_word_query": 31.8766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1098.01, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2238, "query_norm": 1.3747, "queue_k_norm": 1.4095, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2206, "sent_len_1": 66.6658, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7038, "stdk": 0.0477, "stdq": 0.0447, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 51.5625, "active_queue_size": 16384.0, "cl_loss": 3.3876, "doc_norm": 1.4081, "encoder_q-embeddings": 1152.1086, "encoder_q-layer.0": 809.7286, "encoder_q-layer.1": 871.942, "encoder_q-layer.10": 620.7085, "encoder_q-layer.11": 1375.5372, "encoder_q-layer.2": 1092.4231, "encoder_q-layer.3": 1195.0023, "encoder_q-layer.4": 1185.2111, "encoder_q-layer.5": 1207.3855, "encoder_q-layer.6": 1086.9503, "encoder_q-layer.7": 1177.3584, "encoder_q-layer.8": 1188.4038, "encoder_q-layer.9": 779.2321, "epoch": 0.3, "inbatch_neg_score": 0.2219, "inbatch_pos_score": 0.8721, "learning_rate": 3.838888888888889e-05, "loss": 3.3876, "norm_diff": 0.0195, "norm_loss": 0.0, "num_token_doc": 66.6393, "num_token_overlap": 15.7688, "num_token_query": 42.2064, "num_token_union": 68.3475, "num_word_context": 201.8313, "num_word_doc": 49.7045, "num_word_query": 31.8663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1595.9553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2228, "query_norm": 1.3887, "queue_k_norm": 1.4056, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2064, "sent_len_1": 66.6393, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9363, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.4034, "doc_norm": 1.4116, "encoder_q-embeddings": 9294.6738, "encoder_q-layer.0": 7553.3286, "encoder_q-layer.1": 9197.4072, "encoder_q-layer.10": 1294.623, "encoder_q-layer.11": 2967.5566, "encoder_q-layer.2": 10838.5059, "encoder_q-layer.3": 10624.4609, "encoder_q-layer.4": 11386.7783, "encoder_q-layer.5": 10226.6553, "encoder_q-layer.6": 9261.5576, "encoder_q-layer.7": 8170.5669, "encoder_q-layer.8": 7477.4834, "encoder_q-layer.9": 2573.5732, "epoch": 0.3, "inbatch_neg_score": 0.2212, "inbatch_pos_score": 0.8955, "learning_rate": 3.8333333333333334e-05, "loss": 3.4034, "norm_diff": 0.0267, "norm_loss": 0.0, "num_token_doc": 66.7309, "num_token_overlap": 15.8782, "num_token_query": 42.4992, "num_token_union": 68.5407, "num_word_context": 202.3547, "num_word_doc": 49.762, "num_word_query": 32.0959, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12492.3161, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2206, "query_norm": 1.3849, "queue_k_norm": 1.407, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4992, "sent_len_1": 66.7309, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.5075, "stdk": 0.0477, "stdq": 0.0448, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.4017, "doc_norm": 1.4139, "encoder_q-embeddings": 1736.2207, "encoder_q-layer.0": 1139.4407, "encoder_q-layer.1": 1249.5433, "encoder_q-layer.10": 1235.8915, "encoder_q-layer.11": 2849.636, "encoder_q-layer.2": 1393.0638, "encoder_q-layer.3": 1462.0658, "encoder_q-layer.4": 1671.8533, "encoder_q-layer.5": 1530.2194, "encoder_q-layer.6": 1655.02, "encoder_q-layer.7": 1571.8436, "encoder_q-layer.8": 1538.2246, "encoder_q-layer.9": 1259.517, "epoch": 0.3, "inbatch_neg_score": 0.2169, "inbatch_pos_score": 0.8511, "learning_rate": 3.827777777777778e-05, "loss": 3.4017, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.8296, "num_token_overlap": 15.8126, "num_token_query": 42.4257, "num_token_union": 68.5867, "num_word_context": 202.4983, "num_word_doc": 49.8656, "num_word_query": 32.0701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2439.5196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2155, "query_norm": 1.3787, "queue_k_norm": 1.4062, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4257, "sent_len_1": 66.8296, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5775, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3973, "doc_norm": 1.4025, "encoder_q-embeddings": 1721.9535, "encoder_q-layer.0": 1164.5079, "encoder_q-layer.1": 1204.061, "encoder_q-layer.10": 1219.7925, "encoder_q-layer.11": 2792.0398, "encoder_q-layer.2": 1236.6158, "encoder_q-layer.3": 1197.6511, "encoder_q-layer.4": 1221.5956, "encoder_q-layer.5": 1209.5887, "encoder_q-layer.6": 1279.4669, "encoder_q-layer.7": 1320.9851, "encoder_q-layer.8": 1407.4729, "encoder_q-layer.9": 1214.33, "epoch": 0.3, "inbatch_neg_score": 0.2177, "inbatch_pos_score": 0.8555, "learning_rate": 3.8222222222222226e-05, "loss": 3.3973, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.8201, "num_token_overlap": 15.8383, "num_token_query": 42.3337, "num_token_union": 68.4668, "num_word_context": 202.013, "num_word_doc": 49.8099, "num_word_query": 31.9775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2229.7143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2185, "query_norm": 1.37, "queue_k_norm": 1.406, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3337, "sent_len_1": 66.8201, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.6488, "stdk": 0.0474, "stdq": 0.0442, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.4011, "doc_norm": 1.4112, "encoder_q-embeddings": 13156.6846, "encoder_q-layer.0": 8328.3848, "encoder_q-layer.1": 7394.8896, "encoder_q-layer.10": 1291.1926, "encoder_q-layer.11": 2779.0552, "encoder_q-layer.2": 7048.0723, "encoder_q-layer.3": 7334.3623, "encoder_q-layer.4": 7105.1763, "encoder_q-layer.5": 6390.6919, "encoder_q-layer.6": 5920.6548, "encoder_q-layer.7": 6199.0269, "encoder_q-layer.8": 5116.0522, "encoder_q-layer.9": 2417.1812, "epoch": 0.31, "inbatch_neg_score": 0.2145, "inbatch_pos_score": 0.8594, "learning_rate": 3.816666666666667e-05, "loss": 3.4011, "norm_diff": 0.0202, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 15.7793, "num_token_query": 42.2823, "num_token_union": 68.5271, "num_word_context": 202.1094, "num_word_doc": 49.8247, "num_word_query": 31.9221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11190.4913, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2152, "query_norm": 1.3911, "queue_k_norm": 1.4065, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2823, "sent_len_1": 66.7773, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.525, "stdk": 0.0478, "stdq": 0.0451, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.391, "doc_norm": 1.4072, "encoder_q-embeddings": 1677.2224, "encoder_q-layer.0": 1102.7642, "encoder_q-layer.1": 1219.7955, "encoder_q-layer.10": 1264.5857, "encoder_q-layer.11": 2859.0496, "encoder_q-layer.2": 1406.7174, "encoder_q-layer.3": 1482.4951, "encoder_q-layer.4": 1561.6113, "encoder_q-layer.5": 1437.7964, "encoder_q-layer.6": 1524.4935, "encoder_q-layer.7": 1613.3062, "encoder_q-layer.8": 1835.7709, "encoder_q-layer.9": 1359.4072, "epoch": 0.31, "inbatch_neg_score": 0.2244, "inbatch_pos_score": 0.8755, "learning_rate": 3.811111111111112e-05, "loss": 3.391, "norm_diff": 0.0214, "norm_loss": 0.0, "num_token_doc": 66.8222, "num_token_overlap": 15.829, "num_token_query": 42.2644, "num_token_union": 68.5334, "num_word_context": 202.5979, "num_word_doc": 49.8895, "num_word_query": 31.9246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2442.0871, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2251, "query_norm": 1.3879, "queue_k_norm": 1.4047, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2644, "sent_len_1": 66.8222, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.1687, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.373, "doc_norm": 1.4094, "encoder_q-embeddings": 1376.9116, "encoder_q-layer.0": 888.8945, "encoder_q-layer.1": 909.7233, "encoder_q-layer.10": 1317.1237, "encoder_q-layer.11": 2966.0984, "encoder_q-layer.2": 1000.72, "encoder_q-layer.3": 1056.6713, "encoder_q-layer.4": 1161.5779, "encoder_q-layer.5": 1086.4414, "encoder_q-layer.6": 1193.4545, "encoder_q-layer.7": 1321.1405, "encoder_q-layer.8": 1550.749, "encoder_q-layer.9": 1292.2793, "epoch": 0.31, "inbatch_neg_score": 0.2368, "inbatch_pos_score": 0.8843, "learning_rate": 3.805555555555555e-05, "loss": 3.373, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.8871, "num_token_overlap": 15.8387, "num_token_query": 42.3073, "num_token_union": 68.5173, "num_word_context": 202.0176, "num_word_doc": 49.8731, "num_word_query": 31.9425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2124.2175, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2375, "query_norm": 1.3753, "queue_k_norm": 1.4053, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3073, "sent_len_1": 66.8871, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2975, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 51.1719, "active_queue_size": 16384.0, "cl_loss": 3.3943, "doc_norm": 1.3987, "encoder_q-embeddings": 1348.7185, "encoder_q-layer.0": 895.8344, "encoder_q-layer.1": 1005.0262, "encoder_q-layer.10": 1219.4882, "encoder_q-layer.11": 2837.0867, "encoder_q-layer.2": 1099.9382, "encoder_q-layer.3": 1174.2642, "encoder_q-layer.4": 1148.8761, "encoder_q-layer.5": 1133.9493, "encoder_q-layer.6": 1211.1456, "encoder_q-layer.7": 1272.6952, "encoder_q-layer.8": 1426.9657, "encoder_q-layer.9": 1195.3717, "epoch": 0.31, "inbatch_neg_score": 0.2333, "inbatch_pos_score": 0.8477, "learning_rate": 3.8e-05, "loss": 3.3943, "norm_diff": 0.0558, "norm_loss": 0.0, "num_token_doc": 66.9568, "num_token_overlap": 15.837, "num_token_query": 42.3253, "num_token_union": 68.5835, "num_word_context": 202.666, "num_word_doc": 49.9706, "num_word_query": 31.973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2063.056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2336, "query_norm": 1.3428, "queue_k_norm": 1.4055, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3253, "sent_len_1": 66.9568, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.1138, "stdk": 0.0473, "stdq": 0.0433, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.3931, "doc_norm": 1.4026, "encoder_q-embeddings": 2639.6985, "encoder_q-layer.0": 1824.9041, "encoder_q-layer.1": 2028.6539, "encoder_q-layer.10": 1231.3024, "encoder_q-layer.11": 2641.9106, "encoder_q-layer.2": 2230.3733, "encoder_q-layer.3": 2574.988, "encoder_q-layer.4": 2618.021, "encoder_q-layer.5": 2595.8062, "encoder_q-layer.6": 2422.2051, "encoder_q-layer.7": 2255.7275, "encoder_q-layer.8": 2046.7292, "encoder_q-layer.9": 1287.0713, "epoch": 0.31, "inbatch_neg_score": 0.2403, "inbatch_pos_score": 0.8838, "learning_rate": 3.7944444444444444e-05, "loss": 3.3931, "norm_diff": 0.0157, "norm_loss": 0.0, "num_token_doc": 66.6168, "num_token_overlap": 15.7622, "num_token_query": 42.2227, "num_token_union": 68.362, "num_word_context": 202.102, "num_word_doc": 49.7012, "num_word_query": 31.8684, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3345.472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2413, "query_norm": 1.3873, "queue_k_norm": 1.4038, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2227, "sent_len_1": 66.6168, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0225, "stdk": 0.0475, "stdq": 0.0448, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.365, "doc_norm": 1.4108, "encoder_q-embeddings": 1192.5007, "encoder_q-layer.0": 780.8544, "encoder_q-layer.1": 839.439, "encoder_q-layer.10": 1339.6605, "encoder_q-layer.11": 2848.5042, "encoder_q-layer.2": 944.1776, "encoder_q-layer.3": 989.6829, "encoder_q-layer.4": 1010.6871, "encoder_q-layer.5": 1020.2255, "encoder_q-layer.6": 1144.3201, "encoder_q-layer.7": 1309.1667, "encoder_q-layer.8": 1456.7507, "encoder_q-layer.9": 1296.2184, "epoch": 0.31, "inbatch_neg_score": 0.2369, "inbatch_pos_score": 0.8853, "learning_rate": 3.7888888888888894e-05, "loss": 3.365, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.5837, "num_token_overlap": 15.8531, "num_token_query": 42.3754, "num_token_union": 68.3751, "num_word_context": 201.9846, "num_word_doc": 49.7096, "num_word_query": 31.9856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1991.2446, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2375, "query_norm": 1.4038, "queue_k_norm": 1.4054, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3754, "sent_len_1": 66.5837, "sent_len_max_0": 127.995, "sent_len_max_1": 190.0, "stdk": 0.0478, "stdq": 0.0453, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 50.3906, "active_queue_size": 16384.0, "cl_loss": 3.3836, "doc_norm": 1.4029, "encoder_q-embeddings": 3874.3396, "encoder_q-layer.0": 2687.9294, "encoder_q-layer.1": 2981.6133, "encoder_q-layer.10": 1313.9053, "encoder_q-layer.11": 2899.0276, "encoder_q-layer.2": 3529.6086, "encoder_q-layer.3": 3494.0715, "encoder_q-layer.4": 4059.5491, "encoder_q-layer.5": 2810.4368, "encoder_q-layer.6": 2423.4971, "encoder_q-layer.7": 2054.9136, "encoder_q-layer.8": 1864.4385, "encoder_q-layer.9": 1305.522, "epoch": 0.31, "inbatch_neg_score": 0.2333, "inbatch_pos_score": 0.8628, "learning_rate": 3.7833333333333336e-05, "loss": 3.3836, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.7075, "num_token_overlap": 15.7455, "num_token_query": 42.2746, "num_token_union": 68.5197, "num_word_context": 202.2792, "num_word_doc": 49.8163, "num_word_query": 31.9638, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4386.4486, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2327, "query_norm": 1.3985, "queue_k_norm": 1.4034, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2746, "sent_len_1": 66.7075, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.5137, "stdk": 0.0475, "stdq": 0.0451, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3423, "doc_norm": 1.4024, "encoder_q-embeddings": 1521.865, "encoder_q-layer.0": 1005.5906, "encoder_q-layer.1": 1113.8693, "encoder_q-layer.10": 1258.4965, "encoder_q-layer.11": 2840.1548, "encoder_q-layer.2": 1351.6504, "encoder_q-layer.3": 1431.3224, "encoder_q-layer.4": 1290.967, "encoder_q-layer.5": 1216.4408, "encoder_q-layer.6": 1409.4788, "encoder_q-layer.7": 1463.7073, "encoder_q-layer.8": 1501.0192, "encoder_q-layer.9": 1249.376, "epoch": 0.31, "inbatch_neg_score": 0.2319, "inbatch_pos_score": 0.8701, "learning_rate": 3.777777777777778e-05, "loss": 3.3423, "norm_diff": 0.0253, "norm_loss": 0.0, "num_token_doc": 66.8997, "num_token_overlap": 15.8839, "num_token_query": 42.3761, "num_token_union": 68.5754, "num_word_context": 202.2749, "num_word_doc": 49.9144, "num_word_query": 32.0351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2257.3094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2332, "query_norm": 1.3788, "queue_k_norm": 1.4064, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3761, "sent_len_1": 66.8997, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8575, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3586, "doc_norm": 1.4039, "encoder_q-embeddings": 2004.9486, "encoder_q-layer.0": 1363.5801, "encoder_q-layer.1": 1427.4446, "encoder_q-layer.10": 1231.5731, "encoder_q-layer.11": 2816.1431, "encoder_q-layer.2": 1658.6587, "encoder_q-layer.3": 1748.7184, "encoder_q-layer.4": 1859.9438, "encoder_q-layer.5": 1739.2911, "encoder_q-layer.6": 1697.1874, "encoder_q-layer.7": 1589.2511, "encoder_q-layer.8": 1711.5845, "encoder_q-layer.9": 1358.1986, "epoch": 0.31, "inbatch_neg_score": 0.2407, "inbatch_pos_score": 0.8848, "learning_rate": 3.772222222222223e-05, "loss": 3.3586, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.9737, "num_token_overlap": 15.7667, "num_token_query": 42.0847, "num_token_union": 68.475, "num_word_context": 202.5759, "num_word_doc": 49.9166, "num_word_query": 31.7757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2631.9944, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2402, "query_norm": 1.4037, "queue_k_norm": 1.4053, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.0847, "sent_len_1": 66.9737, "sent_len_max_0": 127.985, "sent_len_max_1": 191.3862, "stdk": 0.0475, "stdq": 0.0452, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3499, "doc_norm": 1.4042, "encoder_q-embeddings": 3571.4507, "encoder_q-layer.0": 2686.7224, "encoder_q-layer.1": 2644.0398, "encoder_q-layer.10": 1255.7948, "encoder_q-layer.11": 2842.7527, "encoder_q-layer.2": 2966.6443, "encoder_q-layer.3": 2845.5244, "encoder_q-layer.4": 3091.9231, "encoder_q-layer.5": 3205.1733, "encoder_q-layer.6": 2889.6204, "encoder_q-layer.7": 2489.1833, "encoder_q-layer.8": 2179.687, "encoder_q-layer.9": 1425.4407, "epoch": 0.31, "inbatch_neg_score": 0.2497, "inbatch_pos_score": 0.8945, "learning_rate": 3.766666666666667e-05, "loss": 3.3499, "norm_diff": 0.0072, "norm_loss": 0.0, "num_token_doc": 67.0713, "num_token_overlap": 15.875, "num_token_query": 42.2758, "num_token_union": 68.5957, "num_word_context": 202.4984, "num_word_doc": 50.0095, "num_word_query": 31.9209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4011.3755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2476, "query_norm": 1.4037, "queue_k_norm": 1.4064, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2758, "sent_len_1": 67.0713, "sent_len_max_0": 127.98, "sent_len_max_1": 191.2612, "stdk": 0.0474, "stdq": 0.045, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3883, "doc_norm": 1.4062, "encoder_q-embeddings": 1385.2924, "encoder_q-layer.0": 901.7119, "encoder_q-layer.1": 963.7345, "encoder_q-layer.10": 1192.0914, "encoder_q-layer.11": 2718.9507, "encoder_q-layer.2": 1031.595, "encoder_q-layer.3": 1070.1088, "encoder_q-layer.4": 1108.0034, "encoder_q-layer.5": 1069.5988, "encoder_q-layer.6": 1173.7881, "encoder_q-layer.7": 1421.6348, "encoder_q-layer.8": 1511.5614, "encoder_q-layer.9": 1279.2683, "epoch": 0.32, "inbatch_neg_score": 0.2411, "inbatch_pos_score": 0.8813, "learning_rate": 3.761111111111111e-05, "loss": 3.3883, "norm_diff": 0.0117, "norm_loss": 0.0, "num_token_doc": 66.8553, "num_token_overlap": 15.832, "num_token_query": 42.3776, "num_token_union": 68.5541, "num_word_context": 202.3485, "num_word_doc": 49.8456, "num_word_query": 32.0189, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2026.1798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2416, "query_norm": 1.3968, "queue_k_norm": 1.4045, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3776, "sent_len_1": 66.8553, "sent_len_max_0": 128.0, "sent_len_max_1": 192.0337, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.3598, "doc_norm": 1.4126, "encoder_q-embeddings": 1243.0514, "encoder_q-layer.0": 843.2324, "encoder_q-layer.1": 895.1516, "encoder_q-layer.10": 1154.9843, "encoder_q-layer.11": 2702.717, "encoder_q-layer.2": 1020.2264, "encoder_q-layer.3": 1092.5734, "encoder_q-layer.4": 1113.6293, "encoder_q-layer.5": 1055.2626, "encoder_q-layer.6": 1054.1608, "encoder_q-layer.7": 1139.4889, "encoder_q-layer.8": 1307.2184, "encoder_q-layer.9": 1217.9509, "epoch": 0.32, "inbatch_neg_score": 0.2467, "inbatch_pos_score": 0.8701, "learning_rate": 3.7555555555555554e-05, "loss": 3.3598, "norm_diff": 0.047, "norm_loss": 0.0, "num_token_doc": 66.7796, "num_token_overlap": 15.8891, "num_token_query": 42.5726, "num_token_union": 68.5894, "num_word_context": 202.1176, "num_word_doc": 49.7992, "num_word_query": 32.1549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1970.6144, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2468, "query_norm": 1.3655, "queue_k_norm": 1.4056, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5726, "sent_len_1": 66.7796, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6725, "stdk": 0.0478, "stdq": 0.0435, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3621, "doc_norm": 1.4027, "encoder_q-embeddings": 1275.8154, "encoder_q-layer.0": 853.5844, "encoder_q-layer.1": 897.5094, "encoder_q-layer.10": 1333.9697, "encoder_q-layer.11": 2914.8755, "encoder_q-layer.2": 1023.1189, "encoder_q-layer.3": 1104.8527, "encoder_q-layer.4": 1066.9744, "encoder_q-layer.5": 1096.6155, "encoder_q-layer.6": 1218.3667, "encoder_q-layer.7": 1253.9651, "encoder_q-layer.8": 1472.5651, "encoder_q-layer.9": 1333.796, "epoch": 0.32, "inbatch_neg_score": 0.2365, "inbatch_pos_score": 0.874, "learning_rate": 3.7500000000000003e-05, "loss": 3.3621, "norm_diff": 0.0286, "norm_loss": 0.0, "num_token_doc": 66.8965, "num_token_overlap": 15.8292, "num_token_query": 42.3336, "num_token_union": 68.5943, "num_word_context": 202.6076, "num_word_doc": 49.8838, "num_word_query": 31.9673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2068.1408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2373, "query_norm": 1.3741, "queue_k_norm": 1.4079, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3336, "sent_len_1": 66.8965, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4812, "stdk": 0.0474, "stdq": 0.0443, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.3653, "doc_norm": 1.4052, "encoder_q-embeddings": 1609.7145, "encoder_q-layer.0": 1065.7899, "encoder_q-layer.1": 1216.8411, "encoder_q-layer.10": 1306.6069, "encoder_q-layer.11": 3039.5166, "encoder_q-layer.2": 1305.1641, "encoder_q-layer.3": 1349.1459, "encoder_q-layer.4": 1373.7522, "encoder_q-layer.5": 1311.9055, "encoder_q-layer.6": 1350.8447, "encoder_q-layer.7": 1435.7145, "encoder_q-layer.8": 1539.9836, "encoder_q-layer.9": 1314.3052, "epoch": 0.32, "inbatch_neg_score": 0.2476, "inbatch_pos_score": 0.8506, "learning_rate": 3.7444444444444446e-05, "loss": 3.3653, "norm_diff": 0.0393, "norm_loss": 0.0, "num_token_doc": 66.6762, "num_token_overlap": 15.8402, "num_token_query": 42.4339, "num_token_union": 68.4776, "num_word_context": 202.1352, "num_word_doc": 49.7717, "num_word_query": 32.0726, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2369.1309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2471, "query_norm": 1.3659, "queue_k_norm": 1.4066, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4339, "sent_len_1": 66.6762, "sent_len_max_0": 128.0, "sent_len_max_1": 187.81, "stdk": 0.0475, "stdq": 0.0439, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 50.293, "active_queue_size": 16384.0, "cl_loss": 3.3671, "doc_norm": 1.4023, "encoder_q-embeddings": 3136.428, "encoder_q-layer.0": 2335.1382, "encoder_q-layer.1": 2944.7988, "encoder_q-layer.10": 1345.1964, "encoder_q-layer.11": 3088.3088, "encoder_q-layer.2": 2890.7612, "encoder_q-layer.3": 2776.4678, "encoder_q-layer.4": 3068.4148, "encoder_q-layer.5": 2474.364, "encoder_q-layer.6": 2563.4128, "encoder_q-layer.7": 2325.7178, "encoder_q-layer.8": 2197.2546, "encoder_q-layer.9": 1473.4945, "epoch": 0.32, "inbatch_neg_score": 0.2451, "inbatch_pos_score": 0.8711, "learning_rate": 3.738888888888889e-05, "loss": 3.3671, "norm_diff": 0.0235, "norm_loss": 0.0, "num_token_doc": 66.8003, "num_token_overlap": 15.8176, "num_token_query": 42.4228, "num_token_union": 68.5831, "num_word_context": 202.3215, "num_word_doc": 49.8603, "num_word_query": 32.054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3863.1539, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.245, "query_norm": 1.3789, "queue_k_norm": 1.4061, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4228, "sent_len_1": 66.8003, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.7512, "stdk": 0.0474, "stdq": 0.0445, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3791, "doc_norm": 1.4094, "encoder_q-embeddings": 2106.5686, "encoder_q-layer.0": 1372.1769, "encoder_q-layer.1": 1558.187, "encoder_q-layer.10": 1324.9364, "encoder_q-layer.11": 3142.9807, "encoder_q-layer.2": 1686.2363, "encoder_q-layer.3": 1727.9447, "encoder_q-layer.4": 1612.5226, "encoder_q-layer.5": 1789.9727, "encoder_q-layer.6": 2235.1577, "encoder_q-layer.7": 2547.897, "encoder_q-layer.8": 2925.3352, "encoder_q-layer.9": 1983.92, "epoch": 0.32, "inbatch_neg_score": 0.2423, "inbatch_pos_score": 0.8843, "learning_rate": 3.733333333333334e-05, "loss": 3.3791, "norm_diff": 0.0223, "norm_loss": 0.0, "num_token_doc": 66.8562, "num_token_overlap": 15.8233, "num_token_query": 42.3759, "num_token_union": 68.6029, "num_word_context": 202.4927, "num_word_doc": 49.8514, "num_word_query": 32.0172, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3142.0854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2435, "query_norm": 1.3936, "queue_k_norm": 1.4057, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3759, "sent_len_1": 66.8562, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.7875, "stdk": 0.0476, "stdq": 0.0451, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3798, "doc_norm": 1.4116, "encoder_q-embeddings": 1407.2681, "encoder_q-layer.0": 888.1941, "encoder_q-layer.1": 928.396, "encoder_q-layer.10": 1329.9767, "encoder_q-layer.11": 3075.6357, "encoder_q-layer.2": 1018.3319, "encoder_q-layer.3": 1039.6593, "encoder_q-layer.4": 1091.5452, "encoder_q-layer.5": 1148.9717, "encoder_q-layer.6": 1223.915, "encoder_q-layer.7": 1314.0779, "encoder_q-layer.8": 1513.6443, "encoder_q-layer.9": 1345.849, "epoch": 0.32, "inbatch_neg_score": 0.2391, "inbatch_pos_score": 0.8628, "learning_rate": 3.727777777777778e-05, "loss": 3.3798, "norm_diff": 0.056, "norm_loss": 0.0, "num_token_doc": 66.6764, "num_token_overlap": 15.7085, "num_token_query": 42.1185, "num_token_union": 68.3894, "num_word_context": 202.3521, "num_word_doc": 49.7718, "num_word_query": 31.8109, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2139.5135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2373, "query_norm": 1.3556, "queue_k_norm": 1.4052, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1185, "sent_len_1": 66.6764, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8175, "stdk": 0.0477, "stdq": 0.0436, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.3557, "doc_norm": 1.4084, "encoder_q-embeddings": 4467.3384, "encoder_q-layer.0": 3050.0547, "encoder_q-layer.1": 3150.1665, "encoder_q-layer.10": 2422.7939, "encoder_q-layer.11": 5569.6689, "encoder_q-layer.2": 3808.4124, "encoder_q-layer.3": 3960.0664, "encoder_q-layer.4": 4042.7971, "encoder_q-layer.5": 3801.9619, "encoder_q-layer.6": 3354.7407, "encoder_q-layer.7": 3494.9099, "encoder_q-layer.8": 3199.6492, "encoder_q-layer.9": 2571.2354, "epoch": 0.32, "inbatch_neg_score": 0.2235, "inbatch_pos_score": 0.8989, "learning_rate": 3.722222222222222e-05, "loss": 3.3557, "norm_diff": 0.0177, "norm_loss": 0.0, "num_token_doc": 66.7763, "num_token_overlap": 15.7783, "num_token_query": 42.1071, "num_token_union": 68.3671, "num_word_context": 201.9818, "num_word_doc": 49.802, "num_word_query": 31.7921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5548.0073, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2236, "query_norm": 1.3934, "queue_k_norm": 1.4071, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1071, "sent_len_1": 66.7763, "sent_len_max_0": 127.9862, "sent_len_max_1": 191.1175, "stdk": 0.0476, "stdq": 0.0455, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3586, "doc_norm": 1.4059, "encoder_q-embeddings": 1462.8099, "encoder_q-layer.0": 959.183, "encoder_q-layer.1": 987.7112, "encoder_q-layer.10": 1307.7125, "encoder_q-layer.11": 2837.8574, "encoder_q-layer.2": 1130.9036, "encoder_q-layer.3": 1206.8074, "encoder_q-layer.4": 1292.0903, "encoder_q-layer.5": 1216.8407, "encoder_q-layer.6": 1488.6075, "encoder_q-layer.7": 1554.0984, "encoder_q-layer.8": 1846.1296, "encoder_q-layer.9": 1392.0708, "epoch": 0.32, "inbatch_neg_score": 0.2182, "inbatch_pos_score": 0.8662, "learning_rate": 3.7166666666666664e-05, "loss": 3.3586, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.636, "num_token_overlap": 15.8745, "num_token_query": 42.3012, "num_token_union": 68.3449, "num_word_context": 202.0857, "num_word_doc": 49.7278, "num_word_query": 31.9453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2246.4485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.218, "query_norm": 1.3691, "queue_k_norm": 1.4061, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3012, "sent_len_1": 66.636, "sent_len_max_0": 128.0, "sent_len_max_1": 188.495, "stdk": 0.0475, "stdq": 0.0448, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.3518, "doc_norm": 1.4096, "encoder_q-embeddings": 2376.4976, "encoder_q-layer.0": 1844.7543, "encoder_q-layer.1": 1862.3694, "encoder_q-layer.10": 1375.2073, "encoder_q-layer.11": 2874.665, "encoder_q-layer.2": 2185.8645, "encoder_q-layer.3": 2221.0452, "encoder_q-layer.4": 2557.8679, "encoder_q-layer.5": 2564.4084, "encoder_q-layer.6": 2391.5183, "encoder_q-layer.7": 2362.0952, "encoder_q-layer.8": 2294.54, "encoder_q-layer.9": 1600.0945, "epoch": 0.32, "inbatch_neg_score": 0.2193, "inbatch_pos_score": 0.8628, "learning_rate": 3.7111111111111113e-05, "loss": 3.3518, "norm_diff": 0.0295, "norm_loss": 0.0, "num_token_doc": 66.912, "num_token_overlap": 15.8874, "num_token_query": 42.4285, "num_token_union": 68.6405, "num_word_context": 202.5354, "num_word_doc": 49.9218, "num_word_query": 32.0575, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3273.4485, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2192, "query_norm": 1.3801, "queue_k_norm": 1.4061, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4285, "sent_len_1": 66.912, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7413, "stdk": 0.0477, "stdq": 0.0452, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3706, "doc_norm": 1.4024, "encoder_q-embeddings": 1142.6819, "encoder_q-layer.0": 721.9733, "encoder_q-layer.1": 744.5578, "encoder_q-layer.10": 1133.8746, "encoder_q-layer.11": 2695.7444, "encoder_q-layer.2": 838.5396, "encoder_q-layer.3": 851.0399, "encoder_q-layer.4": 875.4147, "encoder_q-layer.5": 853.9277, "encoder_q-layer.6": 985.342, "encoder_q-layer.7": 1064.8921, "encoder_q-layer.8": 1206.6243, "encoder_q-layer.9": 1123.5055, "epoch": 0.33, "inbatch_neg_score": 0.2161, "inbatch_pos_score": 0.8687, "learning_rate": 3.705555555555556e-05, "loss": 3.3706, "norm_diff": 0.0482, "norm_loss": 0.0, "num_token_doc": 66.7729, "num_token_overlap": 15.8339, "num_token_query": 42.5212, "num_token_union": 68.5525, "num_word_context": 202.6249, "num_word_doc": 49.8359, "num_word_query": 32.1428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1828.5021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2159, "query_norm": 1.3542, "queue_k_norm": 1.4039, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5212, "sent_len_1": 66.7729, "sent_len_max_0": 128.0, "sent_len_max_1": 190.26, "stdk": 0.0475, "stdq": 0.044, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3487, "doc_norm": 1.4066, "encoder_q-embeddings": 1313.9258, "encoder_q-layer.0": 842.7731, "encoder_q-layer.1": 921.7623, "encoder_q-layer.10": 1472.99, "encoder_q-layer.11": 2864.251, "encoder_q-layer.2": 1045.0813, "encoder_q-layer.3": 1106.9181, "encoder_q-layer.4": 1161.2617, "encoder_q-layer.5": 1178.6771, "encoder_q-layer.6": 1365.6284, "encoder_q-layer.7": 1474.8782, "encoder_q-layer.8": 1631.1746, "encoder_q-layer.9": 1281.9727, "epoch": 0.33, "inbatch_neg_score": 0.2067, "inbatch_pos_score": 0.8521, "learning_rate": 3.7e-05, "loss": 3.3487, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 67.1349, "num_token_overlap": 15.8214, "num_token_query": 42.1581, "num_token_union": 68.5558, "num_word_context": 202.8193, "num_word_doc": 50.1011, "num_word_query": 31.8079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2137.4455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2064, "query_norm": 1.3655, "queue_k_norm": 1.4049, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1581, "sent_len_1": 67.1349, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.52, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3534, "doc_norm": 1.4023, "encoder_q-embeddings": 1428.7462, "encoder_q-layer.0": 996.6017, "encoder_q-layer.1": 1049.3457, "encoder_q-layer.10": 1208.2089, "encoder_q-layer.11": 2691.8027, "encoder_q-layer.2": 1159.4465, "encoder_q-layer.3": 1200.964, "encoder_q-layer.4": 1271.4373, "encoder_q-layer.5": 1301.1403, "encoder_q-layer.6": 1356.4757, "encoder_q-layer.7": 1439.8612, "encoder_q-layer.8": 1569.0607, "encoder_q-layer.9": 1239.3589, "epoch": 0.33, "inbatch_neg_score": 0.2172, "inbatch_pos_score": 0.8613, "learning_rate": 3.694444444444445e-05, "loss": 3.3534, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.8319, "num_token_overlap": 15.8926, "num_token_query": 42.4555, "num_token_union": 68.504, "num_word_context": 202.3992, "num_word_doc": 49.8292, "num_word_query": 32.0703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2139.7872, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2175, "query_norm": 1.3775, "queue_k_norm": 1.4046, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4555, "sent_len_1": 66.8319, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.87, "stdk": 0.0476, "stdq": 0.0448, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3472, "doc_norm": 1.3964, "encoder_q-embeddings": 3881.1462, "encoder_q-layer.0": 2565.3403, "encoder_q-layer.1": 2909.615, "encoder_q-layer.10": 1237.2012, "encoder_q-layer.11": 2886.8677, "encoder_q-layer.2": 3258.5957, "encoder_q-layer.3": 3143.2944, "encoder_q-layer.4": 2987.3071, "encoder_q-layer.5": 2817.2219, "encoder_q-layer.6": 2740.0852, "encoder_q-layer.7": 2693.3464, "encoder_q-layer.8": 2556.1724, "encoder_q-layer.9": 1482.2382, "epoch": 0.33, "inbatch_neg_score": 0.2226, "inbatch_pos_score": 0.8501, "learning_rate": 3.688888888888889e-05, "loss": 3.3472, "norm_diff": 0.0284, "norm_loss": 0.0, "num_token_doc": 66.8846, "num_token_overlap": 15.8183, "num_token_query": 42.3254, "num_token_union": 68.615, "num_word_context": 202.6391, "num_word_doc": 49.8753, "num_word_query": 32.0008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4288.11, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2205, "query_norm": 1.368, "queue_k_norm": 1.4051, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3254, "sent_len_1": 66.8846, "sent_len_max_0": 127.99, "sent_len_max_1": 190.3475, "stdk": 0.0472, "stdq": 0.0443, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 51.0742, "active_queue_size": 16384.0, "cl_loss": 3.345, "doc_norm": 1.4071, "encoder_q-embeddings": 1381.5244, "encoder_q-layer.0": 913.5263, "encoder_q-layer.1": 965.7603, "encoder_q-layer.10": 1257.7507, "encoder_q-layer.11": 2817.0879, "encoder_q-layer.2": 1057.0756, "encoder_q-layer.3": 1116.2938, "encoder_q-layer.4": 1135.9434, "encoder_q-layer.5": 1138.0736, "encoder_q-layer.6": 1221.6776, "encoder_q-layer.7": 1289.3389, "encoder_q-layer.8": 1400.8174, "encoder_q-layer.9": 1255.3927, "epoch": 0.33, "inbatch_neg_score": 0.224, "inbatch_pos_score": 0.8633, "learning_rate": 3.683333333333334e-05, "loss": 3.345, "norm_diff": 0.0153, "norm_loss": 0.0, "num_token_doc": 66.8352, "num_token_overlap": 15.8113, "num_token_query": 42.3185, "num_token_union": 68.5477, "num_word_context": 202.3797, "num_word_doc": 49.879, "num_word_query": 31.9773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2088.9523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2242, "query_norm": 1.3941, "queue_k_norm": 1.4029, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3185, "sent_len_1": 66.8352, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.985, "stdk": 0.0477, "stdq": 0.0451, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.3443, "doc_norm": 1.4016, "encoder_q-embeddings": 1515.2898, "encoder_q-layer.0": 998.1298, "encoder_q-layer.1": 1059.6943, "encoder_q-layer.10": 1384.9526, "encoder_q-layer.11": 2851.738, "encoder_q-layer.2": 1197.975, "encoder_q-layer.3": 1254.2638, "encoder_q-layer.4": 1359.594, "encoder_q-layer.5": 1460.3296, "encoder_q-layer.6": 1494.0277, "encoder_q-layer.7": 1553.9442, "encoder_q-layer.8": 1647.1863, "encoder_q-layer.9": 1275.2777, "epoch": 0.33, "inbatch_neg_score": 0.2232, "inbatch_pos_score": 0.8535, "learning_rate": 3.677777777777778e-05, "loss": 3.3443, "norm_diff": 0.0356, "norm_loss": 0.0, "num_token_doc": 66.4914, "num_token_overlap": 15.7949, "num_token_query": 42.2266, "num_token_union": 68.2886, "num_word_context": 201.7585, "num_word_doc": 49.6201, "num_word_query": 31.8959, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2287.7856, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2218, "query_norm": 1.366, "queue_k_norm": 1.4032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2266, "sent_len_1": 66.4914, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7875, "stdk": 0.0475, "stdq": 0.0443, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.338, "doc_norm": 1.4067, "encoder_q-embeddings": 1179.2708, "encoder_q-layer.0": 831.6924, "encoder_q-layer.1": 875.3856, "encoder_q-layer.10": 686.9547, "encoder_q-layer.11": 1466.6776, "encoder_q-layer.2": 951.3531, "encoder_q-layer.3": 1007.0913, "encoder_q-layer.4": 997.4813, "encoder_q-layer.5": 906.6019, "encoder_q-layer.6": 846.6407, "encoder_q-layer.7": 809.0796, "encoder_q-layer.8": 774.7978, "encoder_q-layer.9": 667.4517, "epoch": 0.33, "inbatch_neg_score": 0.2238, "inbatch_pos_score": 0.8608, "learning_rate": 3.672222222222222e-05, "loss": 3.338, "norm_diff": 0.0375, "norm_loss": 0.0, "num_token_doc": 66.8995, "num_token_overlap": 15.8967, "num_token_query": 42.4429, "num_token_union": 68.5352, "num_word_context": 201.9931, "num_word_doc": 49.9175, "num_word_query": 32.0675, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1449.2609, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2244, "query_norm": 1.3692, "queue_k_norm": 1.4041, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4429, "sent_len_1": 66.8995, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.8688, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3387, "doc_norm": 1.4092, "encoder_q-embeddings": 971.3205, "encoder_q-layer.0": 670.6382, "encoder_q-layer.1": 732.3457, "encoder_q-layer.10": 625.3802, "encoder_q-layer.11": 1451.6616, "encoder_q-layer.2": 844.6171, "encoder_q-layer.3": 917.2233, "encoder_q-layer.4": 969.0407, "encoder_q-layer.5": 980.3633, "encoder_q-layer.6": 1099.83, "encoder_q-layer.7": 941.035, "encoder_q-layer.8": 822.2943, "encoder_q-layer.9": 672.2606, "epoch": 0.33, "inbatch_neg_score": 0.218, "inbatch_pos_score": 0.8535, "learning_rate": 3.6666666666666666e-05, "loss": 3.3387, "norm_diff": 0.0402, "norm_loss": 0.0, "num_token_doc": 66.8521, "num_token_overlap": 15.8185, "num_token_query": 42.3081, "num_token_union": 68.578, "num_word_context": 202.1515, "num_word_doc": 49.9281, "num_word_query": 31.9744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1362.635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2179, "query_norm": 1.369, "queue_k_norm": 1.4056, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3081, "sent_len_1": 66.8521, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8887, "stdk": 0.0478, "stdq": 0.0444, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 51.4648, "active_queue_size": 16384.0, "cl_loss": 3.3321, "doc_norm": 1.4046, "encoder_q-embeddings": 895.9243, "encoder_q-layer.0": 652.1639, "encoder_q-layer.1": 702.0678, "encoder_q-layer.10": 317.9401, "encoder_q-layer.11": 728.0075, "encoder_q-layer.2": 850.6525, "encoder_q-layer.3": 937.9747, "encoder_q-layer.4": 1038.7625, "encoder_q-layer.5": 903.7196, "encoder_q-layer.6": 1051.3959, "encoder_q-layer.7": 1425.7079, "encoder_q-layer.8": 1846.3817, "encoder_q-layer.9": 871.0118, "epoch": 0.33, "inbatch_neg_score": 0.2235, "inbatch_pos_score": 0.8428, "learning_rate": 3.6611111111111115e-05, "loss": 3.3321, "norm_diff": 0.0457, "norm_loss": 0.0, "num_token_doc": 66.8582, "num_token_overlap": 15.7777, "num_token_query": 42.2471, "num_token_union": 68.4792, "num_word_context": 201.9925, "num_word_doc": 49.8338, "num_word_query": 31.9267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1602.5776, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2227, "query_norm": 1.3589, "queue_k_norm": 1.4048, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2471, "sent_len_1": 66.8582, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.9313, "stdk": 0.0476, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3399, "doc_norm": 1.4126, "encoder_q-embeddings": 305.6936, "encoder_q-layer.0": 202.5382, "encoder_q-layer.1": 221.1797, "encoder_q-layer.10": 293.6318, "encoder_q-layer.11": 720.9984, "encoder_q-layer.2": 233.3285, "encoder_q-layer.3": 241.2827, "encoder_q-layer.4": 253.7131, "encoder_q-layer.5": 249.133, "encoder_q-layer.6": 279.9501, "encoder_q-layer.7": 310.3665, "encoder_q-layer.8": 332.7301, "encoder_q-layer.9": 290.5461, "epoch": 0.33, "inbatch_neg_score": 0.2217, "inbatch_pos_score": 0.8877, "learning_rate": 3.655555555555556e-05, "loss": 3.3399, "norm_diff": 0.0464, "norm_loss": 0.0, "num_token_doc": 66.7238, "num_token_overlap": 15.7985, "num_token_query": 42.1519, "num_token_union": 68.3866, "num_word_context": 202.1884, "num_word_doc": 49.7898, "num_word_query": 31.8448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 503.1372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2222, "query_norm": 1.3662, "queue_k_norm": 1.4067, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1519, "sent_len_1": 66.7238, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9125, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3439, "doc_norm": 1.4071, "encoder_q-embeddings": 351.2868, "encoder_q-layer.0": 226.3593, "encoder_q-layer.1": 227.7308, "encoder_q-layer.10": 313.2173, "encoder_q-layer.11": 737.7402, "encoder_q-layer.2": 252.5191, "encoder_q-layer.3": 256.3239, "encoder_q-layer.4": 263.2179, "encoder_q-layer.5": 261.2528, "encoder_q-layer.6": 295.9006, "encoder_q-layer.7": 306.6815, "encoder_q-layer.8": 353.0641, "encoder_q-layer.9": 323.5527, "epoch": 0.33, "inbatch_neg_score": 0.2259, "inbatch_pos_score": 0.8726, "learning_rate": 3.65e-05, "loss": 3.3439, "norm_diff": 0.0426, "norm_loss": 0.0, "num_token_doc": 66.9526, "num_token_overlap": 15.8174, "num_token_query": 42.1659, "num_token_union": 68.482, "num_word_context": 202.0099, "num_word_doc": 49.9773, "num_word_query": 31.8463, "postclip_grad_norm": 1.0, "preclip_grad_norm": 523.7181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2271, "query_norm": 1.3645, "queue_k_norm": 1.4049, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1659, "sent_len_1": 66.9526, "sent_len_max_0": 127.9988, "sent_len_max_1": 186.9775, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.3405, "doc_norm": 1.4028, "encoder_q-embeddings": 344.7285, "encoder_q-layer.0": 231.5483, "encoder_q-layer.1": 247.7197, "encoder_q-layer.10": 301.5593, "encoder_q-layer.11": 703.6351, "encoder_q-layer.2": 275.0885, "encoder_q-layer.3": 275.0323, "encoder_q-layer.4": 279.9879, "encoder_q-layer.5": 282.547, "encoder_q-layer.6": 285.7108, "encoder_q-layer.7": 302.891, "encoder_q-layer.8": 332.2612, "encoder_q-layer.9": 308.3002, "epoch": 0.34, "inbatch_neg_score": 0.214, "inbatch_pos_score": 0.8828, "learning_rate": 3.644444444444445e-05, "loss": 3.3405, "norm_diff": 0.0368, "norm_loss": 0.0, "num_token_doc": 66.7994, "num_token_overlap": 15.7948, "num_token_query": 42.3114, "num_token_union": 68.4781, "num_word_context": 202.2951, "num_word_doc": 49.804, "num_word_query": 31.9311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 508.7411, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2131, "query_norm": 1.3661, "queue_k_norm": 1.4036, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3114, "sent_len_1": 66.7994, "sent_len_max_0": 128.0, "sent_len_max_1": 190.97, "stdk": 0.0475, "stdq": 0.0449, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.3165, "doc_norm": 1.4093, "encoder_q-embeddings": 563.6469, "encoder_q-layer.0": 364.1926, "encoder_q-layer.1": 403.7115, "encoder_q-layer.10": 315.6127, "encoder_q-layer.11": 673.8943, "encoder_q-layer.2": 461.9946, "encoder_q-layer.3": 467.4723, "encoder_q-layer.4": 436.7421, "encoder_q-layer.5": 409.2778, "encoder_q-layer.6": 416.2187, "encoder_q-layer.7": 418.2927, "encoder_q-layer.8": 430.0638, "encoder_q-layer.9": 337.2621, "epoch": 0.34, "inbatch_neg_score": 0.2131, "inbatch_pos_score": 0.877, "learning_rate": 3.638888888888889e-05, "loss": 3.3165, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.8529, "num_token_overlap": 15.8572, "num_token_query": 42.3329, "num_token_union": 68.4945, "num_word_context": 202.4684, "num_word_doc": 49.8689, "num_word_query": 31.9966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 686.3687, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2134, "query_norm": 1.3621, "queue_k_norm": 1.4056, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3329, "sent_len_1": 66.8529, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1612, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3437, "doc_norm": 1.4009, "encoder_q-embeddings": 481.9785, "encoder_q-layer.0": 305.115, "encoder_q-layer.1": 359.5788, "encoder_q-layer.10": 327.8467, "encoder_q-layer.11": 709.1396, "encoder_q-layer.2": 434.4658, "encoder_q-layer.3": 461.4316, "encoder_q-layer.4": 473.7267, "encoder_q-layer.5": 472.2899, "encoder_q-layer.6": 507.2126, "encoder_q-layer.7": 453.8097, "encoder_q-layer.8": 498.5135, "encoder_q-layer.9": 386.2684, "epoch": 0.34, "inbatch_neg_score": 0.2215, "inbatch_pos_score": 0.8628, "learning_rate": 3.633333333333333e-05, "loss": 3.3437, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 67.0228, "num_token_overlap": 15.8682, "num_token_query": 42.5336, "num_token_union": 68.7409, "num_word_context": 202.5548, "num_word_doc": 49.9745, "num_word_query": 32.0989, "postclip_grad_norm": 1.0, "preclip_grad_norm": 690.0922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2202, "query_norm": 1.3745, "queue_k_norm": 1.405, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5336, "sent_len_1": 67.0228, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.5387, "stdk": 0.0475, "stdq": 0.045, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.3496, "doc_norm": 1.4042, "encoder_q-embeddings": 288.6665, "encoder_q-layer.0": 209.188, "encoder_q-layer.1": 231.0145, "encoder_q-layer.10": 164.6402, "encoder_q-layer.11": 357.8489, "encoder_q-layer.2": 285.8638, "encoder_q-layer.3": 307.7919, "encoder_q-layer.4": 316.2524, "encoder_q-layer.5": 257.0905, "encoder_q-layer.6": 231.9819, "encoder_q-layer.7": 214.2471, "encoder_q-layer.8": 216.9154, "encoder_q-layer.9": 167.0097, "epoch": 0.34, "inbatch_neg_score": 0.2158, "inbatch_pos_score": 0.8574, "learning_rate": 3.6277777777777776e-05, "loss": 3.3496, "norm_diff": 0.021, "norm_loss": 0.0, "num_token_doc": 66.7267, "num_token_overlap": 15.7394, "num_token_query": 42.1781, "num_token_union": 68.3953, "num_word_context": 201.8215, "num_word_doc": 49.815, "num_word_query": 31.8386, "postclip_grad_norm": 1.0, "preclip_grad_norm": 384.7814, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2161, "query_norm": 1.3832, "queue_k_norm": 1.4033, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1781, "sent_len_1": 66.7267, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3963, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.3412, "doc_norm": 1.4055, "encoder_q-embeddings": 214.9543, "encoder_q-layer.0": 138.517, "encoder_q-layer.1": 152.0216, "encoder_q-layer.10": 163.9026, "encoder_q-layer.11": 362.2389, "encoder_q-layer.2": 159.2591, "encoder_q-layer.3": 159.2384, "encoder_q-layer.4": 164.9948, "encoder_q-layer.5": 155.816, "encoder_q-layer.6": 170.2124, "encoder_q-layer.7": 190.3921, "encoder_q-layer.8": 198.4428, "encoder_q-layer.9": 161.4037, "epoch": 0.34, "inbatch_neg_score": 0.2167, "inbatch_pos_score": 0.8438, "learning_rate": 3.6222222222222225e-05, "loss": 3.3412, "norm_diff": 0.048, "norm_loss": 0.0, "num_token_doc": 66.598, "num_token_overlap": 15.8413, "num_token_query": 42.3701, "num_token_union": 68.3936, "num_word_context": 202.4363, "num_word_doc": 49.7313, "num_word_query": 32.0298, "postclip_grad_norm": 1.0, "preclip_grad_norm": 292.0134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2168, "query_norm": 1.3575, "queue_k_norm": 1.4022, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3701, "sent_len_1": 66.598, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3388, "stdk": 0.0477, "stdq": 0.0441, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3265, "doc_norm": 1.3993, "encoder_q-embeddings": 553.5579, "encoder_q-layer.0": 397.2405, "encoder_q-layer.1": 411.8266, "encoder_q-layer.10": 152.907, "encoder_q-layer.11": 355.2731, "encoder_q-layer.2": 462.0502, "encoder_q-layer.3": 466.7848, "encoder_q-layer.4": 476.4034, "encoder_q-layer.5": 391.0138, "encoder_q-layer.6": 419.2639, "encoder_q-layer.7": 381.1936, "encoder_q-layer.8": 319.4559, "encoder_q-layer.9": 169.6128, "epoch": 0.34, "inbatch_neg_score": 0.2133, "inbatch_pos_score": 0.8706, "learning_rate": 3.6166666666666674e-05, "loss": 3.3265, "norm_diff": 0.015, "norm_loss": 0.0, "num_token_doc": 66.861, "num_token_overlap": 15.844, "num_token_query": 42.301, "num_token_union": 68.538, "num_word_context": 202.369, "num_word_doc": 49.8559, "num_word_query": 31.9429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 602.1378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2129, "query_norm": 1.3898, "queue_k_norm": 1.4042, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.301, "sent_len_1": 66.861, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0238, "stdk": 0.0475, "stdq": 0.0456, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3162, "doc_norm": 1.4013, "encoder_q-embeddings": 180.3447, "encoder_q-layer.0": 126.3724, "encoder_q-layer.1": 131.9737, "encoder_q-layer.10": 160.4883, "encoder_q-layer.11": 384.3951, "encoder_q-layer.2": 163.0012, "encoder_q-layer.3": 172.1871, "encoder_q-layer.4": 178.725, "encoder_q-layer.5": 179.4176, "encoder_q-layer.6": 168.8835, "encoder_q-layer.7": 172.7864, "encoder_q-layer.8": 173.767, "encoder_q-layer.9": 157.1089, "epoch": 0.34, "inbatch_neg_score": 0.2124, "inbatch_pos_score": 0.8413, "learning_rate": 3.611111111111111e-05, "loss": 3.3162, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 66.8843, "num_token_overlap": 15.884, "num_token_query": 42.4497, "num_token_union": 68.5477, "num_word_context": 202.3761, "num_word_doc": 49.9502, "num_word_query": 32.0823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 284.1673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2112, "query_norm": 1.3632, "queue_k_norm": 1.405, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4497, "sent_len_1": 66.8843, "sent_len_max_0": 127.995, "sent_len_max_1": 188.4663, "stdk": 0.0476, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3297, "doc_norm": 1.4056, "encoder_q-embeddings": 188.4614, "encoder_q-layer.0": 127.5967, "encoder_q-layer.1": 138.5938, "encoder_q-layer.10": 168.3396, "encoder_q-layer.11": 355.8739, "encoder_q-layer.2": 153.2797, "encoder_q-layer.3": 153.8756, "encoder_q-layer.4": 155.3853, "encoder_q-layer.5": 159.4819, "encoder_q-layer.6": 170.5543, "encoder_q-layer.7": 184.0351, "encoder_q-layer.8": 191.8844, "encoder_q-layer.9": 155.7621, "epoch": 0.34, "inbatch_neg_score": 0.2098, "inbatch_pos_score": 0.8511, "learning_rate": 3.605555555555556e-05, "loss": 3.3297, "norm_diff": 0.0335, "norm_loss": 0.0, "num_token_doc": 66.6904, "num_token_overlap": 15.7557, "num_token_query": 42.1288, "num_token_union": 68.3203, "num_word_context": 201.9911, "num_word_doc": 49.7576, "num_word_query": 31.8275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 281.0312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2103, "query_norm": 1.3721, "queue_k_norm": 1.4033, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1288, "sent_len_1": 66.6904, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6188, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3251, "doc_norm": 1.4144, "encoder_q-embeddings": 140.5896, "encoder_q-layer.0": 88.9436, "encoder_q-layer.1": 91.5089, "encoder_q-layer.10": 166.5414, "encoder_q-layer.11": 362.5134, "encoder_q-layer.2": 100.1565, "encoder_q-layer.3": 101.9874, "encoder_q-layer.4": 110.4386, "encoder_q-layer.5": 113.5461, "encoder_q-layer.6": 131.7161, "encoder_q-layer.7": 146.2001, "encoder_q-layer.8": 171.8705, "encoder_q-layer.9": 156.6731, "epoch": 0.34, "inbatch_neg_score": 0.2038, "inbatch_pos_score": 0.855, "learning_rate": 3.6e-05, "loss": 3.3251, "norm_diff": 0.0731, "norm_loss": 0.0, "num_token_doc": 66.8152, "num_token_overlap": 15.8264, "num_token_query": 42.1798, "num_token_union": 68.4474, "num_word_context": 202.2939, "num_word_doc": 49.8184, "num_word_query": 31.8415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 238.9251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2041, "query_norm": 1.3414, "queue_k_norm": 1.4027, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1798, "sent_len_1": 66.8152, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8462, "stdk": 0.0481, "stdq": 0.0438, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.3252, "doc_norm": 1.4021, "encoder_q-embeddings": 161.1821, "encoder_q-layer.0": 111.9779, "encoder_q-layer.1": 114.0667, "encoder_q-layer.10": 163.3755, "encoder_q-layer.11": 381.8171, "encoder_q-layer.2": 130.2025, "encoder_q-layer.3": 129.349, "encoder_q-layer.4": 135.3248, "encoder_q-layer.5": 135.8928, "encoder_q-layer.6": 143.3334, "encoder_q-layer.7": 153.3512, "encoder_q-layer.8": 172.8725, "encoder_q-layer.9": 157.4523, "epoch": 0.34, "inbatch_neg_score": 0.2055, "inbatch_pos_score": 0.8472, "learning_rate": 3.594444444444445e-05, "loss": 3.3252, "norm_diff": 0.041, "norm_loss": 0.0, "num_token_doc": 66.9551, "num_token_overlap": 15.8788, "num_token_query": 42.3214, "num_token_union": 68.5728, "num_word_context": 202.2788, "num_word_doc": 49.9921, "num_word_query": 31.9533, "postclip_grad_norm": 1.0, "preclip_grad_norm": 264.6396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2054, "query_norm": 1.3611, "queue_k_norm": 1.4035, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3214, "sent_len_1": 66.9551, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5387, "stdk": 0.0476, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.3523, "doc_norm": 1.3988, "encoder_q-embeddings": 140.0533, "encoder_q-layer.0": 88.9073, "encoder_q-layer.1": 93.1676, "encoder_q-layer.10": 152.5877, "encoder_q-layer.11": 346.625, "encoder_q-layer.2": 103.9373, "encoder_q-layer.3": 106.4595, "encoder_q-layer.4": 107.5405, "encoder_q-layer.5": 114.3479, "encoder_q-layer.6": 128.4685, "encoder_q-layer.7": 135.1625, "encoder_q-layer.8": 153.7488, "encoder_q-layer.9": 144.1794, "epoch": 0.35, "inbatch_neg_score": 0.1943, "inbatch_pos_score": 0.8398, "learning_rate": 3.5888888888888886e-05, "loss": 3.3523, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.637, "num_token_overlap": 15.7972, "num_token_query": 42.3527, "num_token_union": 68.5082, "num_word_context": 202.3193, "num_word_doc": 49.7381, "num_word_query": 31.9929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 227.4503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1947, "query_norm": 1.3543, "queue_k_norm": 1.4005, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3527, "sent_len_1": 66.637, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0513, "stdk": 0.0476, "stdq": 0.0445, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.3215, "doc_norm": 1.4012, "encoder_q-embeddings": 157.129, "encoder_q-layer.0": 100.0863, "encoder_q-layer.1": 103.0574, "encoder_q-layer.10": 155.8078, "encoder_q-layer.11": 345.9959, "encoder_q-layer.2": 111.6016, "encoder_q-layer.3": 110.7022, "encoder_q-layer.4": 119.6944, "encoder_q-layer.5": 123.978, "encoder_q-layer.6": 135.9595, "encoder_q-layer.7": 147.7392, "encoder_q-layer.8": 169.598, "encoder_q-layer.9": 145.8224, "epoch": 0.35, "inbatch_neg_score": 0.1955, "inbatch_pos_score": 0.8418, "learning_rate": 3.5833333333333335e-05, "loss": 3.3215, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.7653, "num_token_overlap": 15.8406, "num_token_query": 42.4554, "num_token_union": 68.5264, "num_word_context": 202.093, "num_word_doc": 49.7682, "num_word_query": 32.0596, "postclip_grad_norm": 1.0, "preclip_grad_norm": 245.4824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1948, "query_norm": 1.3599, "queue_k_norm": 1.4012, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4554, "sent_len_1": 66.7653, "sent_len_max_0": 128.0, "sent_len_max_1": 191.035, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.3338, "doc_norm": 1.4008, "encoder_q-embeddings": 164.3398, "encoder_q-layer.0": 107.9542, "encoder_q-layer.1": 116.0625, "encoder_q-layer.10": 165.3928, "encoder_q-layer.11": 349.5479, "encoder_q-layer.2": 120.3436, "encoder_q-layer.3": 121.6612, "encoder_q-layer.4": 124.8135, "encoder_q-layer.5": 122.1715, "encoder_q-layer.6": 138.3392, "encoder_q-layer.7": 153.0435, "encoder_q-layer.8": 171.3411, "encoder_q-layer.9": 158.0129, "epoch": 0.35, "inbatch_neg_score": 0.2025, "inbatch_pos_score": 0.8584, "learning_rate": 3.577777777777778e-05, "loss": 3.3338, "norm_diff": 0.0149, "norm_loss": 0.0, "num_token_doc": 66.6968, "num_token_overlap": 15.7626, "num_token_query": 42.2845, "num_token_union": 68.4515, "num_word_context": 202.202, "num_word_doc": 49.7627, "num_word_query": 31.9392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 249.5594, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.202, "query_norm": 1.3859, "queue_k_norm": 1.4006, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2845, "sent_len_1": 66.6968, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7312, "stdk": 0.0477, "stdq": 0.0453, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.3116, "doc_norm": 1.4024, "encoder_q-embeddings": 144.1725, "encoder_q-layer.0": 93.1185, "encoder_q-layer.1": 103.6302, "encoder_q-layer.10": 175.385, "encoder_q-layer.11": 361.8972, "encoder_q-layer.2": 111.2672, "encoder_q-layer.3": 115.5337, "encoder_q-layer.4": 123.6777, "encoder_q-layer.5": 123.2259, "encoder_q-layer.6": 139.1973, "encoder_q-layer.7": 149.4718, "encoder_q-layer.8": 171.7709, "encoder_q-layer.9": 164.7733, "epoch": 0.35, "inbatch_neg_score": 0.1992, "inbatch_pos_score": 0.8311, "learning_rate": 3.5722222222222226e-05, "loss": 3.3116, "norm_diff": 0.0729, "norm_loss": 0.0, "num_token_doc": 66.9135, "num_token_overlap": 15.8031, "num_token_query": 42.3308, "num_token_union": 68.5861, "num_word_context": 202.4558, "num_word_doc": 49.924, "num_word_query": 31.9811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 243.6319, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1987, "query_norm": 1.3295, "queue_k_norm": 1.4007, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3308, "sent_len_1": 66.9135, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5775, "stdk": 0.0478, "stdq": 0.0432, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3067, "doc_norm": 1.4003, "encoder_q-embeddings": 221.8558, "encoder_q-layer.0": 141.4795, "encoder_q-layer.1": 162.5562, "encoder_q-layer.10": 178.9793, "encoder_q-layer.11": 370.4702, "encoder_q-layer.2": 179.3462, "encoder_q-layer.3": 186.2275, "encoder_q-layer.4": 194.8814, "encoder_q-layer.5": 203.6925, "encoder_q-layer.6": 195.7163, "encoder_q-layer.7": 203.0691, "encoder_q-layer.8": 199.9914, "encoder_q-layer.9": 164.1623, "epoch": 0.35, "inbatch_neg_score": 0.2006, "inbatch_pos_score": 0.8423, "learning_rate": 3.566666666666667e-05, "loss": 3.3067, "norm_diff": 0.0312, "norm_loss": 0.0, "num_token_doc": 66.9411, "num_token_overlap": 15.8681, "num_token_query": 42.2633, "num_token_union": 68.5475, "num_word_context": 202.288, "num_word_doc": 49.9773, "num_word_query": 31.9235, "postclip_grad_norm": 1.0, "preclip_grad_norm": 308.8478, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1998, "query_norm": 1.3691, "queue_k_norm": 1.4002, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2633, "sent_len_1": 66.9411, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4425, "stdk": 0.0477, "stdq": 0.0449, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.3351, "doc_norm": 1.3975, "encoder_q-embeddings": 285.689, "encoder_q-layer.0": 205.8642, "encoder_q-layer.1": 218.3437, "encoder_q-layer.10": 161.9368, "encoder_q-layer.11": 346.571, "encoder_q-layer.2": 218.3529, "encoder_q-layer.3": 213.4212, "encoder_q-layer.4": 223.6049, "encoder_q-layer.5": 235.8908, "encoder_q-layer.6": 228.6831, "encoder_q-layer.7": 208.5601, "encoder_q-layer.8": 213.6254, "encoder_q-layer.9": 156.7324, "epoch": 0.35, "inbatch_neg_score": 0.2116, "inbatch_pos_score": 0.8584, "learning_rate": 3.561111111111111e-05, "loss": 3.3351, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 66.8208, "num_token_overlap": 15.8328, "num_token_query": 42.3369, "num_token_union": 68.4747, "num_word_context": 202.4654, "num_word_doc": 49.858, "num_word_query": 31.9731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 345.8289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2113, "query_norm": 1.3807, "queue_k_norm": 1.4, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3369, "sent_len_1": 66.8208, "sent_len_max_0": 128.0, "sent_len_max_1": 188.18, "stdk": 0.0476, "stdq": 0.0451, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.3297, "doc_norm": 1.4009, "encoder_q-embeddings": 188.2284, "encoder_q-layer.0": 123.9515, "encoder_q-layer.1": 135.4128, "encoder_q-layer.10": 153.6676, "encoder_q-layer.11": 352.8634, "encoder_q-layer.2": 149.3063, "encoder_q-layer.3": 160.3255, "encoder_q-layer.4": 174.2133, "encoder_q-layer.5": 170.2328, "encoder_q-layer.6": 174.3131, "encoder_q-layer.7": 172.5418, "encoder_q-layer.8": 184.772, "encoder_q-layer.9": 156.0242, "epoch": 0.35, "inbatch_neg_score": 0.2077, "inbatch_pos_score": 0.8535, "learning_rate": 3.555555555555556e-05, "loss": 3.3297, "norm_diff": 0.0302, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 15.863, "num_token_query": 42.4527, "num_token_union": 68.531, "num_word_context": 202.3206, "num_word_doc": 49.8628, "num_word_query": 32.0781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 278.957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2078, "query_norm": 1.3708, "queue_k_norm": 1.4001, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4527, "sent_len_1": 66.7993, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6037, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.3067, "doc_norm": 1.4056, "encoder_q-embeddings": 174.356, "encoder_q-layer.0": 114.1401, "encoder_q-layer.1": 117.4745, "encoder_q-layer.10": 178.2247, "encoder_q-layer.11": 371.5211, "encoder_q-layer.2": 124.5986, "encoder_q-layer.3": 128.0547, "encoder_q-layer.4": 135.4586, "encoder_q-layer.5": 141.9347, "encoder_q-layer.6": 160.6619, "encoder_q-layer.7": 182.8056, "encoder_q-layer.8": 217.2771, "encoder_q-layer.9": 168.9632, "epoch": 0.35, "inbatch_neg_score": 0.206, "inbatch_pos_score": 0.8599, "learning_rate": 3.55e-05, "loss": 3.3067, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.9551, "num_token_overlap": 15.9005, "num_token_query": 42.6185, "num_token_union": 68.7197, "num_word_context": 202.7818, "num_word_doc": 50.0078, "num_word_query": 32.2295, "postclip_grad_norm": 1.0, "preclip_grad_norm": 267.2832, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2074, "query_norm": 1.3499, "queue_k_norm": 1.4012, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.6185, "sent_len_1": 66.9551, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0563, "stdk": 0.0479, "stdq": 0.0439, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 50.8789, "active_queue_size": 16384.0, "cl_loss": 3.3376, "doc_norm": 1.4023, "encoder_q-embeddings": 191.3983, "encoder_q-layer.0": 122.6931, "encoder_q-layer.1": 130.6865, "encoder_q-layer.10": 156.9092, "encoder_q-layer.11": 346.682, "encoder_q-layer.2": 148.8596, "encoder_q-layer.3": 151.3068, "encoder_q-layer.4": 161.782, "encoder_q-layer.5": 164.4252, "encoder_q-layer.6": 188.6359, "encoder_q-layer.7": 189.0384, "encoder_q-layer.8": 193.7994, "encoder_q-layer.9": 153.583, "epoch": 0.35, "inbatch_neg_score": 0.2124, "inbatch_pos_score": 0.8525, "learning_rate": 3.5444444444444445e-05, "loss": 3.3376, "norm_diff": 0.0381, "norm_loss": 0.0, "num_token_doc": 66.7202, "num_token_overlap": 15.7863, "num_token_query": 42.3078, "num_token_union": 68.4931, "num_word_context": 202.0959, "num_word_doc": 49.8056, "num_word_query": 31.9391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 277.2718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2142, "query_norm": 1.3642, "queue_k_norm": 1.4009, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3078, "sent_len_1": 66.7202, "sent_len_max_0": 127.98, "sent_len_max_1": 188.225, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3155, "doc_norm": 1.3976, "encoder_q-embeddings": 209.3263, "encoder_q-layer.0": 142.0747, "encoder_q-layer.1": 159.8783, "encoder_q-layer.10": 164.0569, "encoder_q-layer.11": 357.8183, "encoder_q-layer.2": 177.8088, "encoder_q-layer.3": 193.1596, "encoder_q-layer.4": 213.0959, "encoder_q-layer.5": 211.4298, "encoder_q-layer.6": 228.0193, "encoder_q-layer.7": 241.8938, "encoder_q-layer.8": 246.5823, "encoder_q-layer.9": 164.9684, "epoch": 0.35, "inbatch_neg_score": 0.2181, "inbatch_pos_score": 0.8838, "learning_rate": 3.538888888888889e-05, "loss": 3.3155, "norm_diff": 0.0266, "norm_loss": 0.0, "num_token_doc": 66.7777, "num_token_overlap": 15.8395, "num_token_query": 42.3187, "num_token_union": 68.4966, "num_word_context": 202.2628, "num_word_doc": 49.8301, "num_word_query": 31.966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 319.0744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2203, "query_norm": 1.3711, "queue_k_norm": 1.4004, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3187, "sent_len_1": 66.7777, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.8212, "stdk": 0.0477, "stdq": 0.0447, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.3062, "doc_norm": 1.4058, "encoder_q-embeddings": 255.5659, "encoder_q-layer.0": 175.163, "encoder_q-layer.1": 208.3114, "encoder_q-layer.10": 143.6473, "encoder_q-layer.11": 334.8573, "encoder_q-layer.2": 253.9642, "encoder_q-layer.3": 272.599, "encoder_q-layer.4": 314.4591, "encoder_q-layer.5": 333.5977, "encoder_q-layer.6": 343.3011, "encoder_q-layer.7": 252.1395, "encoder_q-layer.8": 200.6103, "encoder_q-layer.9": 147.0661, "epoch": 0.36, "inbatch_neg_score": 0.2324, "inbatch_pos_score": 0.8936, "learning_rate": 3.5333333333333336e-05, "loss": 3.3062, "norm_diff": 0.0315, "norm_loss": 0.0, "num_token_doc": 66.8501, "num_token_overlap": 15.83, "num_token_query": 42.3253, "num_token_union": 68.5394, "num_word_context": 202.4315, "num_word_doc": 49.887, "num_word_query": 31.9737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 383.2188, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2329, "query_norm": 1.3743, "queue_k_norm": 1.4026, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3253, "sent_len_1": 66.8501, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.8875, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.3169, "doc_norm": 1.3987, "encoder_q-embeddings": 166.386, "encoder_q-layer.0": 109.5372, "encoder_q-layer.1": 123.5952, "encoder_q-layer.10": 143.8144, "encoder_q-layer.11": 330.0577, "encoder_q-layer.2": 134.5603, "encoder_q-layer.3": 147.0993, "encoder_q-layer.4": 146.3694, "encoder_q-layer.5": 149.1743, "encoder_q-layer.6": 156.4661, "encoder_q-layer.7": 160.3241, "encoder_q-layer.8": 169.5061, "encoder_q-layer.9": 152.3902, "epoch": 0.36, "inbatch_neg_score": 0.2252, "inbatch_pos_score": 0.8638, "learning_rate": 3.527777777777778e-05, "loss": 3.3169, "norm_diff": 0.0264, "norm_loss": 0.0, "num_token_doc": 66.8393, "num_token_overlap": 15.8231, "num_token_query": 42.3359, "num_token_union": 68.5459, "num_word_context": 202.4313, "num_word_doc": 49.8801, "num_word_query": 31.9905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 253.8858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2241, "query_norm": 1.3723, "queue_k_norm": 1.404, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3359, "sent_len_1": 66.8393, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2337, "stdk": 0.0476, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.3074, "doc_norm": 1.4023, "encoder_q-embeddings": 141.014, "encoder_q-layer.0": 88.818, "encoder_q-layer.1": 91.6228, "encoder_q-layer.10": 169.1192, "encoder_q-layer.11": 348.4379, "encoder_q-layer.2": 99.4881, "encoder_q-layer.3": 102.1595, "encoder_q-layer.4": 110.9538, "encoder_q-layer.5": 111.8346, "encoder_q-layer.6": 136.4991, "encoder_q-layer.7": 147.4771, "encoder_q-layer.8": 167.9978, "encoder_q-layer.9": 150.7133, "epoch": 0.36, "inbatch_neg_score": 0.2172, "inbatch_pos_score": 0.8584, "learning_rate": 3.522222222222222e-05, "loss": 3.3074, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 66.7123, "num_token_overlap": 15.7823, "num_token_query": 42.2599, "num_token_union": 68.4543, "num_word_context": 202.2791, "num_word_doc": 49.8274, "num_word_query": 31.9187, "postclip_grad_norm": 1.0, "preclip_grad_norm": 238.7946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.217, "query_norm": 1.3587, "queue_k_norm": 1.4021, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2599, "sent_len_1": 66.7123, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1287, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.3125, "doc_norm": 1.4057, "encoder_q-embeddings": 288.4227, "encoder_q-layer.0": 181.7731, "encoder_q-layer.1": 190.0577, "encoder_q-layer.10": 316.8658, "encoder_q-layer.11": 722.1805, "encoder_q-layer.2": 219.1144, "encoder_q-layer.3": 231.2587, "encoder_q-layer.4": 242.0119, "encoder_q-layer.5": 241.3596, "encoder_q-layer.6": 272.1242, "encoder_q-layer.7": 317.1763, "encoder_q-layer.8": 358.7878, "encoder_q-layer.9": 319.5746, "epoch": 0.36, "inbatch_neg_score": 0.2223, "inbatch_pos_score": 0.8496, "learning_rate": 3.516666666666667e-05, "loss": 3.3125, "norm_diff": 0.039, "norm_loss": 0.0, "num_token_doc": 66.8537, "num_token_overlap": 15.8221, "num_token_query": 42.258, "num_token_union": 68.5142, "num_word_context": 202.5844, "num_word_doc": 49.8694, "num_word_query": 31.9176, "postclip_grad_norm": 1.0, "preclip_grad_norm": 497.8869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.223, "query_norm": 1.3667, "queue_k_norm": 1.4025, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.258, "sent_len_1": 66.8537, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.24, "stdk": 0.0479, "stdq": 0.0446, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 51.3672, "active_queue_size": 16384.0, "cl_loss": 3.2834, "doc_norm": 1.3959, "encoder_q-embeddings": 390.3103, "encoder_q-layer.0": 285.9443, "encoder_q-layer.1": 322.0056, "encoder_q-layer.10": 319.5765, "encoder_q-layer.11": 726.303, "encoder_q-layer.2": 379.1673, "encoder_q-layer.3": 401.2963, "encoder_q-layer.4": 394.4311, "encoder_q-layer.5": 395.327, "encoder_q-layer.6": 443.486, "encoder_q-layer.7": 436.4536, "encoder_q-layer.8": 397.7871, "encoder_q-layer.9": 304.1492, "epoch": 0.36, "inbatch_neg_score": 0.2256, "inbatch_pos_score": 0.8569, "learning_rate": 3.511111111111111e-05, "loss": 3.2834, "norm_diff": 0.0206, "norm_loss": 0.0, "num_token_doc": 66.7856, "num_token_overlap": 15.9095, "num_token_query": 42.4521, "num_token_union": 68.5236, "num_word_context": 202.3846, "num_word_doc": 49.89, "num_word_query": 32.1241, "postclip_grad_norm": 1.0, "preclip_grad_norm": 610.5115, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2253, "query_norm": 1.3772, "queue_k_norm": 1.4029, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4521, "sent_len_1": 66.7856, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.4988, "stdk": 0.0474, "stdq": 0.0448, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.3229, "doc_norm": 1.3945, "encoder_q-embeddings": 376.933, "encoder_q-layer.0": 263.6271, "encoder_q-layer.1": 292.8973, "encoder_q-layer.10": 286.0148, "encoder_q-layer.11": 691.9781, "encoder_q-layer.2": 311.7908, "encoder_q-layer.3": 326.9004, "encoder_q-layer.4": 325.2968, "encoder_q-layer.5": 323.8436, "encoder_q-layer.6": 323.5369, "encoder_q-layer.7": 332.9822, "encoder_q-layer.8": 340.7238, "encoder_q-layer.9": 300.979, "epoch": 0.36, "inbatch_neg_score": 0.229, "inbatch_pos_score": 0.8647, "learning_rate": 3.505555555555556e-05, "loss": 3.3229, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.836, "num_token_overlap": 15.7333, "num_token_query": 42.1406, "num_token_union": 68.4578, "num_word_context": 202.5694, "num_word_doc": 49.8509, "num_word_query": 31.828, "postclip_grad_norm": 1.0, "preclip_grad_norm": 546.2826, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2284, "query_norm": 1.3552, "queue_k_norm": 1.4042, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1406, "sent_len_1": 66.836, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6138, "stdk": 0.0474, "stdq": 0.044, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.3294, "doc_norm": 1.4035, "encoder_q-embeddings": 287.4733, "encoder_q-layer.0": 188.7918, "encoder_q-layer.1": 198.0476, "encoder_q-layer.10": 294.7332, "encoder_q-layer.11": 674.5162, "encoder_q-layer.2": 218.0404, "encoder_q-layer.3": 226.5336, "encoder_q-layer.4": 235.6312, "encoder_q-layer.5": 247.2152, "encoder_q-layer.6": 259.9816, "encoder_q-layer.7": 296.9776, "encoder_q-layer.8": 318.6623, "encoder_q-layer.9": 291.0114, "epoch": 0.36, "inbatch_neg_score": 0.2295, "inbatch_pos_score": 0.8789, "learning_rate": 3.5e-05, "loss": 3.3294, "norm_diff": 0.0186, "norm_loss": 0.0, "num_token_doc": 66.4493, "num_token_overlap": 15.7285, "num_token_query": 42.1165, "num_token_union": 68.2484, "num_word_context": 201.8488, "num_word_doc": 49.5652, "num_word_query": 31.8132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 469.3679, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.228, "query_norm": 1.3884, "queue_k_norm": 1.4033, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1165, "sent_len_1": 66.4493, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4112, "stdk": 0.0477, "stdq": 0.0452, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.3157, "doc_norm": 1.4011, "encoder_q-embeddings": 426.5246, "encoder_q-layer.0": 309.9511, "encoder_q-layer.1": 340.7784, "encoder_q-layer.10": 279.6655, "encoder_q-layer.11": 644.9713, "encoder_q-layer.2": 392.7417, "encoder_q-layer.3": 384.3816, "encoder_q-layer.4": 366.4255, "encoder_q-layer.5": 352.9389, "encoder_q-layer.6": 376.978, "encoder_q-layer.7": 399.889, "encoder_q-layer.8": 398.2278, "encoder_q-layer.9": 320.5413, "epoch": 0.36, "inbatch_neg_score": 0.2318, "inbatch_pos_score": 0.9038, "learning_rate": 3.4944444444444446e-05, "loss": 3.3157, "norm_diff": 0.0178, "norm_loss": 0.0, "num_token_doc": 66.8461, "num_token_overlap": 15.811, "num_token_query": 42.3322, "num_token_union": 68.521, "num_word_context": 202.2895, "num_word_doc": 49.8424, "num_word_query": 31.9715, "postclip_grad_norm": 1.0, "preclip_grad_norm": 595.5204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2324, "query_norm": 1.3854, "queue_k_norm": 1.4048, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3322, "sent_len_1": 66.8461, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7925, "stdk": 0.0476, "stdq": 0.045, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.3043, "doc_norm": 1.4036, "encoder_q-embeddings": 353.9804, "encoder_q-layer.0": 239.2881, "encoder_q-layer.1": 272.6238, "encoder_q-layer.10": 340.626, "encoder_q-layer.11": 736.2141, "encoder_q-layer.2": 314.3872, "encoder_q-layer.3": 329.2118, "encoder_q-layer.4": 352.1559, "encoder_q-layer.5": 353.8815, "encoder_q-layer.6": 349.0556, "encoder_q-layer.7": 357.5408, "encoder_q-layer.8": 376.005, "encoder_q-layer.9": 325.6597, "epoch": 0.36, "inbatch_neg_score": 0.231, "inbatch_pos_score": 0.8936, "learning_rate": 3.4888888888888895e-05, "loss": 3.3043, "norm_diff": 0.0124, "norm_loss": 0.0, "num_token_doc": 66.8786, "num_token_overlap": 15.8565, "num_token_query": 42.4129, "num_token_union": 68.5462, "num_word_context": 202.4474, "num_word_doc": 49.9245, "num_word_query": 32.0489, "postclip_grad_norm": 1.0, "preclip_grad_norm": 564.0593, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.23, "query_norm": 1.3916, "queue_k_norm": 1.4072, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4129, "sent_len_1": 66.8786, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.1387, "stdk": 0.0477, "stdq": 0.0452, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.295, "doc_norm": 1.4025, "encoder_q-embeddings": 931.2816, "encoder_q-layer.0": 655.485, "encoder_q-layer.1": 821.2739, "encoder_q-layer.10": 331.9372, "encoder_q-layer.11": 732.2772, "encoder_q-layer.2": 898.8343, "encoder_q-layer.3": 914.7857, "encoder_q-layer.4": 942.637, "encoder_q-layer.5": 934.6824, "encoder_q-layer.6": 798.8993, "encoder_q-layer.7": 728.6412, "encoder_q-layer.8": 599.2941, "encoder_q-layer.9": 414.1993, "epoch": 0.36, "inbatch_neg_score": 0.2339, "inbatch_pos_score": 0.9077, "learning_rate": 3.483333333333334e-05, "loss": 3.295, "norm_diff": 0.0156, "norm_loss": 0.0, "num_token_doc": 66.9194, "num_token_overlap": 15.8829, "num_token_query": 42.3376, "num_token_union": 68.5064, "num_word_context": 201.9839, "num_word_doc": 49.9616, "num_word_query": 32.0027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1139.9737, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2335, "query_norm": 1.3979, "queue_k_norm": 1.4061, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3376, "sent_len_1": 66.9194, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2562, "stdk": 0.0476, "stdq": 0.0456, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.3004, "doc_norm": 1.4037, "encoder_q-embeddings": 373.2416, "encoder_q-layer.0": 270.5224, "encoder_q-layer.1": 293.5213, "encoder_q-layer.10": 299.3855, "encoder_q-layer.11": 688.0672, "encoder_q-layer.2": 321.5227, "encoder_q-layer.3": 329.9633, "encoder_q-layer.4": 327.5812, "encoder_q-layer.5": 312.8896, "encoder_q-layer.6": 340.7272, "encoder_q-layer.7": 340.5986, "encoder_q-layer.8": 366.4724, "encoder_q-layer.9": 309.3606, "epoch": 0.37, "inbatch_neg_score": 0.2337, "inbatch_pos_score": 0.8711, "learning_rate": 3.477777777777778e-05, "loss": 3.3004, "norm_diff": 0.0237, "norm_loss": 0.0, "num_token_doc": 66.7991, "num_token_overlap": 15.848, "num_token_query": 42.2159, "num_token_union": 68.3937, "num_word_context": 202.0994, "num_word_doc": 49.8026, "num_word_query": 31.8494, "postclip_grad_norm": 1.0, "preclip_grad_norm": 553.2887, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2332, "query_norm": 1.3815, "queue_k_norm": 1.4049, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2159, "sent_len_1": 66.7991, "sent_len_max_0": 128.0, "sent_len_max_1": 193.6113, "stdk": 0.0476, "stdq": 0.0449, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3091, "doc_norm": 1.4054, "encoder_q-embeddings": 2251.3286, "encoder_q-layer.0": 1805.4059, "encoder_q-layer.1": 1795.5931, "encoder_q-layer.10": 325.662, "encoder_q-layer.11": 704.1617, "encoder_q-layer.2": 1965.8601, "encoder_q-layer.3": 2046.1646, "encoder_q-layer.4": 2248.1428, "encoder_q-layer.5": 2017.8949, "encoder_q-layer.6": 1931.2383, "encoder_q-layer.7": 1901.83, "encoder_q-layer.8": 1460.1609, "encoder_q-layer.9": 595.0225, "epoch": 0.37, "inbatch_neg_score": 0.2302, "inbatch_pos_score": 0.915, "learning_rate": 3.472222222222222e-05, "loss": 3.3091, "norm_diff": 0.0172, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 15.8192, "num_token_query": 42.2718, "num_token_union": 68.3441, "num_word_context": 202.469, "num_word_doc": 49.7472, "num_word_query": 31.9404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2672.2592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2302, "query_norm": 1.3882, "queue_k_norm": 1.4072, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2718, "sent_len_1": 66.6342, "sent_len_max_0": 127.99, "sent_len_max_1": 187.6238, "stdk": 0.0477, "stdq": 0.0455, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.3031, "doc_norm": 1.4083, "encoder_q-embeddings": 346.5603, "encoder_q-layer.0": 238.5076, "encoder_q-layer.1": 252.6785, "encoder_q-layer.10": 327.0583, "encoder_q-layer.11": 763.5307, "encoder_q-layer.2": 277.7104, "encoder_q-layer.3": 289.8954, "encoder_q-layer.4": 300.3153, "encoder_q-layer.5": 298.9812, "encoder_q-layer.6": 343.5884, "encoder_q-layer.7": 369.693, "encoder_q-layer.8": 417.1483, "encoder_q-layer.9": 348.3676, "epoch": 0.37, "inbatch_neg_score": 0.2292, "inbatch_pos_score": 0.8887, "learning_rate": 3.466666666666667e-05, "loss": 3.3031, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.737, "num_token_overlap": 15.8103, "num_token_query": 42.299, "num_token_union": 68.4539, "num_word_context": 202.2247, "num_word_doc": 49.8029, "num_word_query": 31.966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 559.5215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2288, "query_norm": 1.3705, "queue_k_norm": 1.4053, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.299, "sent_len_1": 66.737, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.305, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2883, "doc_norm": 1.4103, "encoder_q-embeddings": 426.0908, "encoder_q-layer.0": 289.4362, "encoder_q-layer.1": 329.794, "encoder_q-layer.10": 350.8963, "encoder_q-layer.11": 736.8818, "encoder_q-layer.2": 344.8165, "encoder_q-layer.3": 365.7303, "encoder_q-layer.4": 359.3264, "encoder_q-layer.5": 362.1332, "encoder_q-layer.6": 408.252, "encoder_q-layer.7": 427.5863, "encoder_q-layer.8": 380.3333, "encoder_q-layer.9": 328.6792, "epoch": 0.37, "inbatch_neg_score": 0.2278, "inbatch_pos_score": 0.8857, "learning_rate": 3.4611111111111114e-05, "loss": 3.2883, "norm_diff": 0.0435, "norm_loss": 0.0, "num_token_doc": 66.8035, "num_token_overlap": 15.7538, "num_token_query": 42.1303, "num_token_union": 68.4839, "num_word_context": 202.4657, "num_word_doc": 49.8984, "num_word_query": 31.8281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 605.7871, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2274, "query_norm": 1.3667, "queue_k_norm": 1.4085, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1303, "sent_len_1": 66.8035, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.7537, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2935, "doc_norm": 1.4066, "encoder_q-embeddings": 590.2289, "encoder_q-layer.0": 494.5377, "encoder_q-layer.1": 537.8785, "encoder_q-layer.10": 326.4526, "encoder_q-layer.11": 737.2596, "encoder_q-layer.2": 549.2872, "encoder_q-layer.3": 519.1382, "encoder_q-layer.4": 533.8194, "encoder_q-layer.5": 434.1071, "encoder_q-layer.6": 498.925, "encoder_q-layer.7": 524.4213, "encoder_q-layer.8": 503.6166, "encoder_q-layer.9": 338.9101, "epoch": 0.37, "inbatch_neg_score": 0.2245, "inbatch_pos_score": 0.8838, "learning_rate": 3.4555555555555556e-05, "loss": 3.2935, "norm_diff": 0.0434, "norm_loss": 0.0, "num_token_doc": 66.7277, "num_token_overlap": 15.8942, "num_token_query": 42.5579, "num_token_union": 68.5647, "num_word_context": 202.2916, "num_word_doc": 49.7951, "num_word_query": 32.1483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 779.8861, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2241, "query_norm": 1.3632, "queue_k_norm": 1.4074, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5579, "sent_len_1": 66.7277, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0737, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.3145, "doc_norm": 1.4066, "encoder_q-embeddings": 532.8128, "encoder_q-layer.0": 364.0301, "encoder_q-layer.1": 394.1848, "encoder_q-layer.10": 302.1272, "encoder_q-layer.11": 716.4995, "encoder_q-layer.2": 446.8828, "encoder_q-layer.3": 468.3017, "encoder_q-layer.4": 506.5392, "encoder_q-layer.5": 499.868, "encoder_q-layer.6": 489.1733, "encoder_q-layer.7": 533.8957, "encoder_q-layer.8": 443.3948, "encoder_q-layer.9": 336.8275, "epoch": 0.37, "inbatch_neg_score": 0.2241, "inbatch_pos_score": 0.8975, "learning_rate": 3.45e-05, "loss": 3.3145, "norm_diff": 0.0353, "norm_loss": 0.0, "num_token_doc": 66.8578, "num_token_overlap": 15.8489, "num_token_query": 42.4206, "num_token_union": 68.5784, "num_word_context": 202.238, "num_word_doc": 49.9217, "num_word_query": 32.0522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 709.8228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.224, "query_norm": 1.3712, "queue_k_norm": 1.409, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4206, "sent_len_1": 66.8578, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.5525, "stdk": 0.0477, "stdq": 0.045, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.3042, "doc_norm": 1.4059, "encoder_q-embeddings": 365.8373, "encoder_q-layer.0": 234.0749, "encoder_q-layer.1": 268.178, "encoder_q-layer.10": 294.5957, "encoder_q-layer.11": 703.2681, "encoder_q-layer.2": 304.1306, "encoder_q-layer.3": 326.2097, "encoder_q-layer.4": 326.5977, "encoder_q-layer.5": 332.1447, "encoder_q-layer.6": 343.8843, "encoder_q-layer.7": 362.6247, "encoder_q-layer.8": 367.13, "encoder_q-layer.9": 305.8781, "epoch": 0.37, "inbatch_neg_score": 0.2266, "inbatch_pos_score": 0.8896, "learning_rate": 3.444444444444445e-05, "loss": 3.3042, "norm_diff": 0.0133, "norm_loss": 0.0, "num_token_doc": 66.8271, "num_token_overlap": 15.8105, "num_token_query": 42.3181, "num_token_union": 68.5338, "num_word_context": 202.3423, "num_word_doc": 49.8579, "num_word_query": 31.9941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 548.6876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2262, "query_norm": 1.3934, "queue_k_norm": 1.4086, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3181, "sent_len_1": 66.8271, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1912, "stdk": 0.0477, "stdq": 0.0457, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2909, "doc_norm": 1.4063, "encoder_q-embeddings": 377.9339, "encoder_q-layer.0": 248.3421, "encoder_q-layer.1": 255.6907, "encoder_q-layer.10": 291.3582, "encoder_q-layer.11": 680.9486, "encoder_q-layer.2": 271.6376, "encoder_q-layer.3": 284.5333, "encoder_q-layer.4": 306.04, "encoder_q-layer.5": 316.2389, "encoder_q-layer.6": 319.8454, "encoder_q-layer.7": 322.6966, "encoder_q-layer.8": 345.5246, "encoder_q-layer.9": 292.094, "epoch": 0.37, "inbatch_neg_score": 0.2247, "inbatch_pos_score": 0.8691, "learning_rate": 3.438888888888889e-05, "loss": 3.2909, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.6033, "num_token_overlap": 15.8403, "num_token_query": 42.3534, "num_token_union": 68.3533, "num_word_context": 202.2502, "num_word_doc": 49.6813, "num_word_query": 31.9805, "postclip_grad_norm": 1.0, "preclip_grad_norm": 527.109, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2244, "query_norm": 1.3545, "queue_k_norm": 1.4078, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3534, "sent_len_1": 66.6033, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0312, "stdk": 0.0477, "stdq": 0.0443, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 52.2461, "active_queue_size": 16384.0, "cl_loss": 3.2944, "doc_norm": 1.4006, "encoder_q-embeddings": 445.199, "encoder_q-layer.0": 321.6501, "encoder_q-layer.1": 377.7109, "encoder_q-layer.10": 294.9104, "encoder_q-layer.11": 711.1511, "encoder_q-layer.2": 456.531, "encoder_q-layer.3": 463.573, "encoder_q-layer.4": 491.1459, "encoder_q-layer.5": 478.9282, "encoder_q-layer.6": 507.6146, "encoder_q-layer.7": 485.2254, "encoder_q-layer.8": 463.7618, "encoder_q-layer.9": 336.0064, "epoch": 0.37, "inbatch_neg_score": 0.2309, "inbatch_pos_score": 0.8604, "learning_rate": 3.433333333333333e-05, "loss": 3.2944, "norm_diff": 0.0285, "norm_loss": 0.0, "num_token_doc": 66.7531, "num_token_overlap": 15.8269, "num_token_query": 42.1821, "num_token_union": 68.3962, "num_word_context": 202.1118, "num_word_doc": 49.8301, "num_word_query": 31.8677, "postclip_grad_norm": 1.0, "preclip_grad_norm": 684.0686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2302, "query_norm": 1.3771, "queue_k_norm": 1.4104, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1821, "sent_len_1": 66.7531, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7763, "stdk": 0.0475, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.291, "doc_norm": 1.4136, "encoder_q-embeddings": 361.6725, "encoder_q-layer.0": 234.9345, "encoder_q-layer.1": 271.4101, "encoder_q-layer.10": 333.6699, "encoder_q-layer.11": 696.6312, "encoder_q-layer.2": 311.698, "encoder_q-layer.3": 331.832, "encoder_q-layer.4": 352.3717, "encoder_q-layer.5": 360.56, "encoder_q-layer.6": 381.8863, "encoder_q-layer.7": 397.302, "encoder_q-layer.8": 398.3923, "encoder_q-layer.9": 317.0898, "epoch": 0.37, "inbatch_neg_score": 0.2286, "inbatch_pos_score": 0.8882, "learning_rate": 3.427777777777778e-05, "loss": 3.291, "norm_diff": 0.0485, "norm_loss": 0.0, "num_token_doc": 66.7064, "num_token_overlap": 15.8478, "num_token_query": 42.2789, "num_token_union": 68.3739, "num_word_context": 201.8643, "num_word_doc": 49.7712, "num_word_query": 31.9326, "postclip_grad_norm": 1.0, "preclip_grad_norm": 566.385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2289, "query_norm": 1.3651, "queue_k_norm": 1.4112, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2789, "sent_len_1": 66.7064, "sent_len_max_0": 128.0, "sent_len_max_1": 187.76, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.2904, "doc_norm": 1.407, "encoder_q-embeddings": 292.6913, "encoder_q-layer.0": 196.1366, "encoder_q-layer.1": 217.7609, "encoder_q-layer.10": 314.4469, "encoder_q-layer.11": 723.9203, "encoder_q-layer.2": 248.107, "encoder_q-layer.3": 266.1707, "encoder_q-layer.4": 289.4747, "encoder_q-layer.5": 263.9463, "encoder_q-layer.6": 291.2874, "encoder_q-layer.7": 312.5769, "encoder_q-layer.8": 332.9933, "encoder_q-layer.9": 291.7478, "epoch": 0.37, "inbatch_neg_score": 0.2298, "inbatch_pos_score": 0.8701, "learning_rate": 3.4222222222222224e-05, "loss": 3.2904, "norm_diff": 0.039, "norm_loss": 0.0, "num_token_doc": 66.8146, "num_token_overlap": 15.855, "num_token_query": 42.3952, "num_token_union": 68.5474, "num_word_context": 202.2363, "num_word_doc": 49.8769, "num_word_query": 32.0409, "postclip_grad_norm": 1.0, "preclip_grad_norm": 504.0246, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2307, "query_norm": 1.3681, "queue_k_norm": 1.4087, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3952, "sent_len_1": 66.8146, "sent_len_max_0": 127.99, "sent_len_max_1": 188.5337, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.2806, "doc_norm": 1.4112, "encoder_q-embeddings": 346.364, "encoder_q-layer.0": 244.3532, "encoder_q-layer.1": 263.2634, "encoder_q-layer.10": 313.7537, "encoder_q-layer.11": 720.5971, "encoder_q-layer.2": 301.4788, "encoder_q-layer.3": 306.5917, "encoder_q-layer.4": 303.5316, "encoder_q-layer.5": 313.9648, "encoder_q-layer.6": 330.9833, "encoder_q-layer.7": 324.0352, "encoder_q-layer.8": 369.6584, "encoder_q-layer.9": 302.4038, "epoch": 0.38, "inbatch_neg_score": 0.2314, "inbatch_pos_score": 0.874, "learning_rate": 3.4166666666666666e-05, "loss": 3.2806, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.7415, "num_token_overlap": 15.8979, "num_token_query": 42.5116, "num_token_union": 68.5472, "num_word_context": 202.5394, "num_word_doc": 49.7873, "num_word_query": 32.1267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 534.9079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.231, "query_norm": 1.3517, "queue_k_norm": 1.4088, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5116, "sent_len_1": 66.7415, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0675, "stdk": 0.0479, "stdq": 0.0441, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.2755, "doc_norm": 1.4125, "encoder_q-embeddings": 3506.8389, "encoder_q-layer.0": 2266.6665, "encoder_q-layer.1": 2708.4358, "encoder_q-layer.10": 323.3484, "encoder_q-layer.11": 665.0485, "encoder_q-layer.2": 3017.3726, "encoder_q-layer.3": 2499.3335, "encoder_q-layer.4": 2585.0515, "encoder_q-layer.5": 1852.6913, "encoder_q-layer.6": 1326.5653, "encoder_q-layer.7": 1101.0479, "encoder_q-layer.8": 841.5442, "encoder_q-layer.9": 464.097, "epoch": 0.38, "inbatch_neg_score": 0.2232, "inbatch_pos_score": 0.875, "learning_rate": 3.411111111111111e-05, "loss": 3.2755, "norm_diff": 0.0505, "norm_loss": 0.0, "num_token_doc": 66.8906, "num_token_overlap": 15.8516, "num_token_query": 42.3438, "num_token_union": 68.5319, "num_word_context": 202.4253, "num_word_doc": 49.9037, "num_word_query": 31.9903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3271.1919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2228, "query_norm": 1.362, "queue_k_norm": 1.409, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3438, "sent_len_1": 66.8906, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7537, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.3023, "doc_norm": 1.4093, "encoder_q-embeddings": 724.3055, "encoder_q-layer.0": 471.3883, "encoder_q-layer.1": 526.6067, "encoder_q-layer.10": 634.5427, "encoder_q-layer.11": 1282.575, "encoder_q-layer.2": 570.2142, "encoder_q-layer.3": 595.2571, "encoder_q-layer.4": 624.4125, "encoder_q-layer.5": 612.6764, "encoder_q-layer.6": 674.7105, "encoder_q-layer.7": 689.6907, "encoder_q-layer.8": 741.4906, "encoder_q-layer.9": 589.278, "epoch": 0.38, "inbatch_neg_score": 0.2258, "inbatch_pos_score": 0.8916, "learning_rate": 3.405555555555556e-05, "loss": 3.3023, "norm_diff": 0.0454, "norm_loss": 0.0, "num_token_doc": 66.988, "num_token_overlap": 15.8121, "num_token_query": 42.3756, "num_token_union": 68.6255, "num_word_context": 202.6413, "num_word_doc": 49.9564, "num_word_query": 32.0037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1043.284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.225, "query_norm": 1.3639, "queue_k_norm": 1.4085, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3756, "sent_len_1": 66.988, "sent_len_max_0": 128.0, "sent_len_max_1": 192.1912, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.2709, "doc_norm": 1.412, "encoder_q-embeddings": 639.8473, "encoder_q-layer.0": 431.1583, "encoder_q-layer.1": 462.7903, "encoder_q-layer.10": 669.7487, "encoder_q-layer.11": 1481.4065, "encoder_q-layer.2": 526.5746, "encoder_q-layer.3": 539.8734, "encoder_q-layer.4": 580.4987, "encoder_q-layer.5": 583.3427, "encoder_q-layer.6": 603.9521, "encoder_q-layer.7": 639.6754, "encoder_q-layer.8": 717.8662, "encoder_q-layer.9": 622.6083, "epoch": 0.38, "inbatch_neg_score": 0.2181, "inbatch_pos_score": 0.8906, "learning_rate": 3.4000000000000007e-05, "loss": 3.2709, "norm_diff": 0.0468, "norm_loss": 0.0, "num_token_doc": 66.912, "num_token_overlap": 15.8977, "num_token_query": 42.4261, "num_token_union": 68.5372, "num_word_context": 202.5297, "num_word_doc": 49.947, "num_word_query": 32.033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1034.0811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2189, "query_norm": 1.3652, "queue_k_norm": 1.41, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4261, "sent_len_1": 66.912, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.36, "stdk": 0.0479, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.271, "doc_norm": 1.4064, "encoder_q-embeddings": 741.297, "encoder_q-layer.0": 498.2304, "encoder_q-layer.1": 555.7618, "encoder_q-layer.10": 610.2679, "encoder_q-layer.11": 1364.08, "encoder_q-layer.2": 662.7732, "encoder_q-layer.3": 687.7876, "encoder_q-layer.4": 676.6143, "encoder_q-layer.5": 664.1144, "encoder_q-layer.6": 730.2034, "encoder_q-layer.7": 738.4042, "encoder_q-layer.8": 781.9261, "encoder_q-layer.9": 616.544, "epoch": 0.38, "inbatch_neg_score": 0.2221, "inbatch_pos_score": 0.8599, "learning_rate": 3.394444444444444e-05, "loss": 3.271, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 15.76, "num_token_query": 42.1901, "num_token_union": 68.3902, "num_word_context": 202.2519, "num_word_doc": 49.7267, "num_word_query": 31.8643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1110.8718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2214, "query_norm": 1.3619, "queue_k_norm": 1.4083, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1901, "sent_len_1": 66.6003, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1912, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2909, "doc_norm": 1.4127, "encoder_q-embeddings": 833.0571, "encoder_q-layer.0": 562.0206, "encoder_q-layer.1": 675.9155, "encoder_q-layer.10": 681.3181, "encoder_q-layer.11": 1409.6829, "encoder_q-layer.2": 829.4521, "encoder_q-layer.3": 861.4877, "encoder_q-layer.4": 979.9407, "encoder_q-layer.5": 1029.3262, "encoder_q-layer.6": 1015.8819, "encoder_q-layer.7": 979.1061, "encoder_q-layer.8": 992.2574, "encoder_q-layer.9": 744.5444, "epoch": 0.38, "inbatch_neg_score": 0.2146, "inbatch_pos_score": 0.8706, "learning_rate": 3.388888888888889e-05, "loss": 3.2909, "norm_diff": 0.038, "norm_loss": 0.0, "num_token_doc": 66.7458, "num_token_overlap": 15.8108, "num_token_query": 42.2472, "num_token_union": 68.4495, "num_word_context": 202.4214, "num_word_doc": 49.8236, "num_word_query": 31.8864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1345.8182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2158, "query_norm": 1.3747, "queue_k_norm": 1.4075, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2472, "sent_len_1": 66.7458, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0825, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2754, "doc_norm": 1.4139, "encoder_q-embeddings": 714.6962, "encoder_q-layer.0": 467.1484, "encoder_q-layer.1": 519.7076, "encoder_q-layer.10": 618.7065, "encoder_q-layer.11": 1378.5674, "encoder_q-layer.2": 583.6255, "encoder_q-layer.3": 600.4828, "encoder_q-layer.4": 636.6022, "encoder_q-layer.5": 607.1515, "encoder_q-layer.6": 658.1525, "encoder_q-layer.7": 657.3703, "encoder_q-layer.8": 676.5022, "encoder_q-layer.9": 579.1311, "epoch": 0.38, "inbatch_neg_score": 0.2223, "inbatch_pos_score": 0.874, "learning_rate": 3.3833333333333334e-05, "loss": 3.2754, "norm_diff": 0.0391, "norm_loss": 0.0, "num_token_doc": 66.875, "num_token_overlap": 15.8317, "num_token_query": 42.3742, "num_token_union": 68.5579, "num_word_context": 202.6818, "num_word_doc": 49.8452, "num_word_query": 31.9829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1048.0786, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2229, "query_norm": 1.3748, "queue_k_norm": 1.4096, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3742, "sent_len_1": 66.875, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9938, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.3035, "doc_norm": 1.4082, "encoder_q-embeddings": 1166.2789, "encoder_q-layer.0": 771.722, "encoder_q-layer.1": 826.8408, "encoder_q-layer.10": 608.9186, "encoder_q-layer.11": 1350.9979, "encoder_q-layer.2": 974.8055, "encoder_q-layer.3": 1012.8397, "encoder_q-layer.4": 1006.9566, "encoder_q-layer.5": 859.2962, "encoder_q-layer.6": 842.0443, "encoder_q-layer.7": 834.7305, "encoder_q-layer.8": 804.4515, "encoder_q-layer.9": 614.4681, "epoch": 0.38, "inbatch_neg_score": 0.2253, "inbatch_pos_score": 0.8965, "learning_rate": 3.377777777777778e-05, "loss": 3.3035, "norm_diff": 0.0187, "norm_loss": 0.0, "num_token_doc": 66.628, "num_token_overlap": 15.7627, "num_token_query": 42.2149, "num_token_union": 68.3865, "num_word_context": 201.9044, "num_word_doc": 49.7, "num_word_query": 31.9054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1381.5714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2252, "query_norm": 1.3895, "queue_k_norm": 1.4081, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2149, "sent_len_1": 66.628, "sent_len_max_0": 127.995, "sent_len_max_1": 190.3038, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2813, "doc_norm": 1.4083, "encoder_q-embeddings": 769.4219, "encoder_q-layer.0": 506.7164, "encoder_q-layer.1": 542.9738, "encoder_q-layer.10": 589.6108, "encoder_q-layer.11": 1343.412, "encoder_q-layer.2": 623.0083, "encoder_q-layer.3": 612.525, "encoder_q-layer.4": 634.6315, "encoder_q-layer.5": 623.6372, "encoder_q-layer.6": 651.8356, "encoder_q-layer.7": 653.1386, "encoder_q-layer.8": 705.8377, "encoder_q-layer.9": 600.1155, "epoch": 0.38, "inbatch_neg_score": 0.2274, "inbatch_pos_score": 0.8623, "learning_rate": 3.3722222222222225e-05, "loss": 3.2813, "norm_diff": 0.042, "norm_loss": 0.0, "num_token_doc": 66.929, "num_token_overlap": 15.8826, "num_token_query": 42.3774, "num_token_union": 68.5371, "num_word_context": 202.9116, "num_word_doc": 49.9138, "num_word_query": 31.9997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1073.1225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2264, "query_norm": 1.3663, "queue_k_norm": 1.4085, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3774, "sent_len_1": 66.929, "sent_len_max_0": 128.0, "sent_len_max_1": 189.09, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.2608, "doc_norm": 1.4109, "encoder_q-embeddings": 842.0367, "encoder_q-layer.0": 538.7055, "encoder_q-layer.1": 652.1828, "encoder_q-layer.10": 648.7626, "encoder_q-layer.11": 1432.8375, "encoder_q-layer.2": 699.7781, "encoder_q-layer.3": 719.9109, "encoder_q-layer.4": 722.3983, "encoder_q-layer.5": 750.3628, "encoder_q-layer.6": 825.8083, "encoder_q-layer.7": 774.5615, "encoder_q-layer.8": 780.7047, "encoder_q-layer.9": 619.0648, "epoch": 0.38, "inbatch_neg_score": 0.2266, "inbatch_pos_score": 0.8618, "learning_rate": 3.366666666666667e-05, "loss": 3.2608, "norm_diff": 0.0807, "norm_loss": 0.0, "num_token_doc": 66.7744, "num_token_overlap": 15.8193, "num_token_query": 42.3524, "num_token_union": 68.5171, "num_word_context": 202.3482, "num_word_doc": 49.848, "num_word_query": 31.9994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1187.5109, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2252, "query_norm": 1.3302, "queue_k_norm": 1.4081, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3524, "sent_len_1": 66.7744, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4487, "stdk": 0.0479, "stdq": 0.0436, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 51.9531, "active_queue_size": 16384.0, "cl_loss": 3.2779, "doc_norm": 1.4088, "encoder_q-embeddings": 943.5778, "encoder_q-layer.0": 631.8465, "encoder_q-layer.1": 610.9575, "encoder_q-layer.10": 634.4946, "encoder_q-layer.11": 1426.4668, "encoder_q-layer.2": 657.9347, "encoder_q-layer.3": 645.8051, "encoder_q-layer.4": 634.0798, "encoder_q-layer.5": 630.5052, "encoder_q-layer.6": 679.6718, "encoder_q-layer.7": 712.7235, "encoder_q-layer.8": 768.3307, "encoder_q-layer.9": 621.9254, "epoch": 0.39, "inbatch_neg_score": 0.2265, "inbatch_pos_score": 0.8486, "learning_rate": 3.3611111111111116e-05, "loss": 3.2779, "norm_diff": 0.0691, "norm_loss": 0.0, "num_token_doc": 66.9377, "num_token_overlap": 15.8407, "num_token_query": 42.4001, "num_token_union": 68.6096, "num_word_context": 202.4924, "num_word_doc": 49.9061, "num_word_query": 32.0416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1180.3989, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.226, "query_norm": 1.3397, "queue_k_norm": 1.4092, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4001, "sent_len_1": 66.9377, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.3013, "stdk": 0.0478, "stdq": 0.0439, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.262, "doc_norm": 1.4052, "encoder_q-embeddings": 623.7946, "encoder_q-layer.0": 404.6221, "encoder_q-layer.1": 414.4518, "encoder_q-layer.10": 625.7971, "encoder_q-layer.11": 1470.1754, "encoder_q-layer.2": 467.2437, "encoder_q-layer.3": 475.7787, "encoder_q-layer.4": 524.0723, "encoder_q-layer.5": 484.754, "encoder_q-layer.6": 562.4719, "encoder_q-layer.7": 646.759, "encoder_q-layer.8": 705.3254, "encoder_q-layer.9": 626.0283, "epoch": 0.39, "inbatch_neg_score": 0.2296, "inbatch_pos_score": 0.8584, "learning_rate": 3.355555555555556e-05, "loss": 3.262, "norm_diff": 0.0453, "norm_loss": 0.0, "num_token_doc": 66.8386, "num_token_overlap": 15.8961, "num_token_query": 42.3696, "num_token_union": 68.4232, "num_word_context": 202.1543, "num_word_doc": 49.8772, "num_word_query": 32.0054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1020.9321, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2301, "query_norm": 1.36, "queue_k_norm": 1.4091, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3696, "sent_len_1": 66.8386, "sent_len_max_0": 127.99, "sent_len_max_1": 191.4787, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2722, "doc_norm": 1.4166, "encoder_q-embeddings": 735.9534, "encoder_q-layer.0": 501.9011, "encoder_q-layer.1": 532.8539, "encoder_q-layer.10": 609.6745, "encoder_q-layer.11": 1364.4272, "encoder_q-layer.2": 591.4424, "encoder_q-layer.3": 605.1064, "encoder_q-layer.4": 614.2684, "encoder_q-layer.5": 654.1147, "encoder_q-layer.6": 668.5951, "encoder_q-layer.7": 663.2639, "encoder_q-layer.8": 722.1472, "encoder_q-layer.9": 600.7774, "epoch": 0.39, "inbatch_neg_score": 0.2276, "inbatch_pos_score": 0.8818, "learning_rate": 3.35e-05, "loss": 3.2722, "norm_diff": 0.0448, "norm_loss": 0.0, "num_token_doc": 66.5185, "num_token_overlap": 15.7622, "num_token_query": 42.1558, "num_token_union": 68.2851, "num_word_context": 202.055, "num_word_doc": 49.6313, "num_word_query": 31.8417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1068.7707, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2274, "query_norm": 1.3718, "queue_k_norm": 1.4087, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1558, "sent_len_1": 66.5185, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.1662, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.2646, "doc_norm": 1.414, "encoder_q-embeddings": 686.3676, "encoder_q-layer.0": 456.9247, "encoder_q-layer.1": 533.8123, "encoder_q-layer.10": 679.2025, "encoder_q-layer.11": 1409.8729, "encoder_q-layer.2": 625.9494, "encoder_q-layer.3": 646.5544, "encoder_q-layer.4": 652.3767, "encoder_q-layer.5": 653.6816, "encoder_q-layer.6": 705.9565, "encoder_q-layer.7": 747.451, "encoder_q-layer.8": 844.1588, "encoder_q-layer.9": 715.1859, "epoch": 0.39, "inbatch_neg_score": 0.2359, "inbatch_pos_score": 0.917, "learning_rate": 3.3444444444444443e-05, "loss": 3.2646, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.88, "num_token_overlap": 15.9347, "num_token_query": 42.5388, "num_token_union": 68.5704, "num_word_context": 202.2461, "num_word_doc": 49.8459, "num_word_query": 32.1037, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1108.7401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.236, "query_norm": 1.4205, "queue_k_norm": 1.4104, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5388, "sent_len_1": 66.88, "sent_len_max_0": 128.0, "sent_len_max_1": 192.8088, "stdk": 0.048, "stdq": 0.0466, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2708, "doc_norm": 1.4097, "encoder_q-embeddings": 566.8007, "encoder_q-layer.0": 357.655, "encoder_q-layer.1": 379.349, "encoder_q-layer.10": 616.0112, "encoder_q-layer.11": 1386.5453, "encoder_q-layer.2": 430.825, "encoder_q-layer.3": 434.3288, "encoder_q-layer.4": 451.7663, "encoder_q-layer.5": 476.0555, "encoder_q-layer.6": 511.6076, "encoder_q-layer.7": 556.3023, "encoder_q-layer.8": 645.2275, "encoder_q-layer.9": 596.8172, "epoch": 0.39, "inbatch_neg_score": 0.2423, "inbatch_pos_score": 0.9038, "learning_rate": 3.338888888888889e-05, "loss": 3.2708, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.7159, "num_token_overlap": 15.769, "num_token_query": 42.2894, "num_token_union": 68.4614, "num_word_context": 202.3765, "num_word_doc": 49.7542, "num_word_query": 31.9389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.9058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2433, "query_norm": 1.3906, "queue_k_norm": 1.4107, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2894, "sent_len_1": 66.7159, "sent_len_max_0": 127.995, "sent_len_max_1": 191.4675, "stdk": 0.0478, "stdq": 0.0451, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2817, "doc_norm": 1.412, "encoder_q-embeddings": 676.5978, "encoder_q-layer.0": 455.6493, "encoder_q-layer.1": 502.5844, "encoder_q-layer.10": 667.576, "encoder_q-layer.11": 1409.2007, "encoder_q-layer.2": 607.4343, "encoder_q-layer.3": 639.0738, "encoder_q-layer.4": 689.246, "encoder_q-layer.5": 664.1308, "encoder_q-layer.6": 704.2496, "encoder_q-layer.7": 781.2095, "encoder_q-layer.8": 770.5355, "encoder_q-layer.9": 603.4979, "epoch": 0.39, "inbatch_neg_score": 0.2417, "inbatch_pos_score": 0.8794, "learning_rate": 3.3333333333333335e-05, "loss": 3.2817, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.5971, "num_token_overlap": 15.7561, "num_token_query": 42.107, "num_token_union": 68.2646, "num_word_context": 202.0361, "num_word_doc": 49.7108, "num_word_query": 31.7894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1100.0585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.24, "query_norm": 1.371, "queue_k_norm": 1.4102, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.107, "sent_len_1": 66.5971, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7962, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 26.717, "dev_samples_per_second": 2.395, "dev_steps_per_second": 0.037, "epoch": 0.39, "step": 40000, "test_accuracy": 93.22509765625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3908320367336273, "test_doc_norm": 1.3949778079986572, "test_inbatch_neg_score": 0.58208167552948, "test_inbatch_pos_score": 1.4769785404205322, "test_loss": 0.3908320367336273, "test_loss_align": 1.1077816486358643, "test_loss_unif": 3.8722572326660156, "test_loss_unif_q@queue": 3.8722572326660156, "test_norm_diff": 0.04877135902643204, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.23521128296852112, "test_query_norm": 1.4437490701675415, "test_queue_k_norm": 1.4101650714874268, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04200267791748047, "test_stdq": 0.04165022447705269, "test_stdqueue_k": 0.04793920740485191, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.717, "dev_samples_per_second": 2.395, "dev_steps_per_second": 0.037, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.38867, "eval_beir-arguana_recall@10": 0.65007, "eval_beir-arguana_recall@100": 0.93954, "eval_beir-arguana_recall@20": 0.79445, "eval_beir-avg_ndcg@10": 0.3627755833333333, "eval_beir-avg_recall@10": 0.4361805833333333, "eval_beir-avg_recall@100": 0.6123057499999999, "eval_beir-avg_recall@20": 0.49258283333333336, "eval_beir-cqadupstack_ndcg@10": 0.24592583333333332, "eval_beir-cqadupstack_recall@10": 0.33888583333333333, "eval_beir-cqadupstack_recall@100": 0.5642475, "eval_beir-cqadupstack_recall@20": 0.40527833333333324, "eval_beir-fiqa_ndcg@10": 0.22053, "eval_beir-fiqa_recall@10": 0.29178, "eval_beir-fiqa_recall@100": 0.54544, "eval_beir-fiqa_recall@20": 0.35377, "eval_beir-nfcorpus_ndcg@10": 0.26953, "eval_beir-nfcorpus_recall@10": 0.12804, "eval_beir-nfcorpus_recall@100": 0.26207, "eval_beir-nfcorpus_recall@20": 0.159, "eval_beir-nq_ndcg@10": 0.25059, "eval_beir-nq_recall@10": 0.41362, "eval_beir-nq_recall@100": 0.75674, "eval_beir-nq_recall@20": 0.51475, "eval_beir-quora_ndcg@10": 0.75811, "eval_beir-quora_recall@10": 0.8723, "eval_beir-quora_recall@100": 0.9727, "eval_beir-quora_recall@20": 0.91651, "eval_beir-scidocs_ndcg@10": 0.13486, "eval_beir-scidocs_recall@10": 0.14443, "eval_beir-scidocs_recall@100": 0.34288, "eval_beir-scidocs_recall@20": 0.19812, "eval_beir-scifact_ndcg@10": 0.61794, "eval_beir-scifact_recall@10": 0.78389, "eval_beir-scifact_recall@100": 0.89433, "eval_beir-scifact_recall@20": 0.82411, "eval_beir-trec-covid_ndcg@10": 0.53716, "eval_beir-trec-covid_recall@10": 0.59, "eval_beir-trec-covid_recall@100": 0.4196, "eval_beir-trec-covid_recall@20": 0.563, "eval_beir-webis-touche2020_ndcg@10": 0.20444, "eval_beir-webis-touche2020_recall@10": 0.14879, "eval_beir-webis-touche2020_recall@100": 0.42551, "eval_beir-webis-touche2020_recall@20": 0.19684, "eval_senteval-avg_sts": 0.7504003749499918, "eval_senteval-sickr_spearman": 0.7147225301818074, "eval_senteval-stsb_spearman": 0.7860782197181762, "step": 40000, "test_accuracy": 93.22509765625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3908320367336273, "test_doc_norm": 1.3949778079986572, "test_inbatch_neg_score": 0.58208167552948, "test_inbatch_pos_score": 1.4769785404205322, "test_loss": 0.3908320367336273, "test_loss_align": 1.1077816486358643, "test_loss_unif": 3.8722572326660156, "test_loss_unif_q@queue": 3.8722572326660156, "test_norm_diff": 0.04877135902643204, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.23521128296852112, "test_query_norm": 1.4437490701675415, "test_queue_k_norm": 1.4101650714874268, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04200267791748047, "test_stdq": 0.04165022447705269, "test_stdqueue_k": 0.04793920740485191, "test_stdqueue_q": 0.0 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2746, "doc_norm": 1.4132, "encoder_q-embeddings": 647.4318, "encoder_q-layer.0": 411.5047, "encoder_q-layer.1": 445.8982, "encoder_q-layer.10": 683.5379, "encoder_q-layer.11": 1474.4481, "encoder_q-layer.2": 493.706, "encoder_q-layer.3": 535.4159, "encoder_q-layer.4": 559.1897, "encoder_q-layer.5": 547.4303, "encoder_q-layer.6": 595.227, "encoder_q-layer.7": 654.9308, "encoder_q-layer.8": 766.5245, "encoder_q-layer.9": 690.9652, "epoch": 0.39, "inbatch_neg_score": 0.2409, "inbatch_pos_score": 0.8945, "learning_rate": 3.327777777777778e-05, "loss": 3.2746, "norm_diff": 0.0352, "norm_loss": 0.0, "num_token_doc": 66.5425, "num_token_overlap": 15.7893, "num_token_query": 42.3689, "num_token_union": 68.3921, "num_word_context": 201.9837, "num_word_doc": 49.6281, "num_word_query": 31.9894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1034.2775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2419, "query_norm": 1.378, "queue_k_norm": 1.4114, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3689, "sent_len_1": 66.5425, "sent_len_max_0": 128.0, "sent_len_max_1": 189.67, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.2671, "doc_norm": 1.4131, "encoder_q-embeddings": 688.2463, "encoder_q-layer.0": 437.5587, "encoder_q-layer.1": 476.9894, "encoder_q-layer.10": 660.8907, "encoder_q-layer.11": 1432.1113, "encoder_q-layer.2": 524.7036, "encoder_q-layer.3": 558.9708, "encoder_q-layer.4": 603.2514, "encoder_q-layer.5": 599.1056, "encoder_q-layer.6": 579.1768, "encoder_q-layer.7": 634.8591, "encoder_q-layer.8": 668.46, "encoder_q-layer.9": 597.0165, "epoch": 0.39, "inbatch_neg_score": 0.2505, "inbatch_pos_score": 0.9204, "learning_rate": 3.322222222222222e-05, "loss": 3.2671, "norm_diff": 0.0263, "norm_loss": 0.0, "num_token_doc": 66.747, "num_token_overlap": 15.7735, "num_token_query": 42.112, "num_token_union": 68.3616, "num_word_context": 202.0513, "num_word_doc": 49.8016, "num_word_query": 31.8049, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1061.394, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2505, "query_norm": 1.3873, "queue_k_norm": 1.4135, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.112, "sent_len_1": 66.747, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.4737, "stdk": 0.0479, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2577, "doc_norm": 1.4153, "encoder_q-embeddings": 590.8837, "encoder_q-layer.0": 403.4222, "encoder_q-layer.1": 439.3219, "encoder_q-layer.10": 596.673, "encoder_q-layer.11": 1360.35, "encoder_q-layer.2": 503.6333, "encoder_q-layer.3": 501.3987, "encoder_q-layer.4": 553.2208, "encoder_q-layer.5": 540.5993, "encoder_q-layer.6": 553.8601, "encoder_q-layer.7": 594.2847, "encoder_q-layer.8": 676.5486, "encoder_q-layer.9": 610.296, "epoch": 0.39, "inbatch_neg_score": 0.2485, "inbatch_pos_score": 0.915, "learning_rate": 3.316666666666667e-05, "loss": 3.2577, "norm_diff": 0.0248, "norm_loss": 0.0, "num_token_doc": 66.8058, "num_token_overlap": 15.7888, "num_token_query": 42.2633, "num_token_union": 68.4617, "num_word_context": 202.3722, "num_word_doc": 49.8467, "num_word_query": 31.9025, "postclip_grad_norm": 1.0, "preclip_grad_norm": 969.4542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2483, "query_norm": 1.3904, "queue_k_norm": 1.4144, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2633, "sent_len_1": 66.8058, "sent_len_max_0": 128.0, "sent_len_max_1": 191.7875, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.2502, "doc_norm": 1.4149, "encoder_q-embeddings": 599.32, "encoder_q-layer.0": 390.4247, "encoder_q-layer.1": 423.189, "encoder_q-layer.10": 603.512, "encoder_q-layer.11": 1363.137, "encoder_q-layer.2": 447.3535, "encoder_q-layer.3": 468.2915, "encoder_q-layer.4": 493.1779, "encoder_q-layer.5": 514.6941, "encoder_q-layer.6": 540.8757, "encoder_q-layer.7": 579.2944, "encoder_q-layer.8": 692.879, "encoder_q-layer.9": 595.2028, "epoch": 0.39, "inbatch_neg_score": 0.2462, "inbatch_pos_score": 0.9121, "learning_rate": 3.311111111111112e-05, "loss": 3.2502, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.8811, "num_token_overlap": 15.8229, "num_token_query": 42.408, "num_token_union": 68.6573, "num_word_context": 202.526, "num_word_doc": 49.9239, "num_word_query": 32.059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 962.5314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2466, "query_norm": 1.4029, "queue_k_norm": 1.4159, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.408, "sent_len_1": 66.8811, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1362, "stdk": 0.048, "stdq": 0.0452, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.2649, "doc_norm": 1.4183, "encoder_q-embeddings": 935.9156, "encoder_q-layer.0": 635.0877, "encoder_q-layer.1": 747.3676, "encoder_q-layer.10": 569.8187, "encoder_q-layer.11": 1332.5488, "encoder_q-layer.2": 850.1181, "encoder_q-layer.3": 877.1269, "encoder_q-layer.4": 912.364, "encoder_q-layer.5": 839.9707, "encoder_q-layer.6": 793.1344, "encoder_q-layer.7": 777.168, "encoder_q-layer.8": 830.3745, "encoder_q-layer.9": 632.8658, "epoch": 0.4, "inbatch_neg_score": 0.2462, "inbatch_pos_score": 0.9189, "learning_rate": 3.3055555555555553e-05, "loss": 3.2649, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.8992, "num_token_overlap": 15.9085, "num_token_query": 42.4041, "num_token_union": 68.5197, "num_word_context": 202.094, "num_word_doc": 49.9194, "num_word_query": 32.0171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1272.4298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2472, "query_norm": 1.3995, "queue_k_norm": 1.4153, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4041, "sent_len_1": 66.8992, "sent_len_max_0": 128.0, "sent_len_max_1": 189.245, "stdk": 0.0481, "stdq": 0.0452, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2718, "doc_norm": 1.4056, "encoder_q-embeddings": 582.6235, "encoder_q-layer.0": 377.787, "encoder_q-layer.1": 408.1959, "encoder_q-layer.10": 620.9248, "encoder_q-layer.11": 1398.9202, "encoder_q-layer.2": 442.376, "encoder_q-layer.3": 453.0388, "encoder_q-layer.4": 470.9343, "encoder_q-layer.5": 472.2628, "encoder_q-layer.6": 533.0404, "encoder_q-layer.7": 582.264, "encoder_q-layer.8": 691.9377, "encoder_q-layer.9": 617.537, "epoch": 0.4, "inbatch_neg_score": 0.2403, "inbatch_pos_score": 0.8965, "learning_rate": 3.3e-05, "loss": 3.2718, "norm_diff": 0.0428, "norm_loss": 0.0, "num_token_doc": 66.8178, "num_token_overlap": 15.8063, "num_token_query": 42.3004, "num_token_union": 68.4528, "num_word_context": 202.097, "num_word_doc": 49.7943, "num_word_query": 31.9317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 964.2805, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2412, "query_norm": 1.3627, "queue_k_norm": 1.4141, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3004, "sent_len_1": 66.8178, "sent_len_max_0": 128.0, "sent_len_max_1": 192.2413, "stdk": 0.0476, "stdq": 0.044, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.2673, "doc_norm": 1.4145, "encoder_q-embeddings": 1535.6786, "encoder_q-layer.0": 1049.8181, "encoder_q-layer.1": 1197.816, "encoder_q-layer.10": 1199.703, "encoder_q-layer.11": 2650.1946, "encoder_q-layer.2": 1409.047, "encoder_q-layer.3": 1446.7914, "encoder_q-layer.4": 1473.8668, "encoder_q-layer.5": 1342.8914, "encoder_q-layer.6": 1293.2142, "encoder_q-layer.7": 1328.3219, "encoder_q-layer.8": 1445.7086, "encoder_q-layer.9": 1221.1786, "epoch": 0.4, "inbatch_neg_score": 0.2504, "inbatch_pos_score": 0.9097, "learning_rate": 3.2944444444444445e-05, "loss": 3.2673, "norm_diff": 0.0301, "norm_loss": 0.0, "num_token_doc": 66.8413, "num_token_overlap": 15.836, "num_token_query": 42.2621, "num_token_union": 68.4818, "num_word_context": 202.2003, "num_word_doc": 49.8753, "num_word_query": 31.9217, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2206.6547, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2502, "query_norm": 1.3844, "queue_k_norm": 1.4159, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2621, "sent_len_1": 66.8413, "sent_len_max_0": 127.995, "sent_len_max_1": 190.7312, "stdk": 0.0479, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2766, "doc_norm": 1.4197, "encoder_q-embeddings": 1150.7301, "encoder_q-layer.0": 759.3898, "encoder_q-layer.1": 790.0955, "encoder_q-layer.10": 1255.9337, "encoder_q-layer.11": 2726.7107, "encoder_q-layer.2": 857.7599, "encoder_q-layer.3": 867.5684, "encoder_q-layer.4": 902.7336, "encoder_q-layer.5": 959.2528, "encoder_q-layer.6": 982.7471, "encoder_q-layer.7": 1075.949, "encoder_q-layer.8": 1282.5693, "encoder_q-layer.9": 1123.9554, "epoch": 0.4, "inbatch_neg_score": 0.2428, "inbatch_pos_score": 0.8657, "learning_rate": 3.2888888888888894e-05, "loss": 3.2766, "norm_diff": 0.0805, "norm_loss": 0.0, "num_token_doc": 67.0432, "num_token_overlap": 15.8217, "num_token_query": 42.3615, "num_token_union": 68.6277, "num_word_context": 202.727, "num_word_doc": 50.0153, "num_word_query": 32.007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1856.3848, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2424, "query_norm": 1.3391, "queue_k_norm": 1.4146, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3615, "sent_len_1": 67.0432, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.295, "stdk": 0.0481, "stdq": 0.0432, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2634, "doc_norm": 1.4138, "encoder_q-embeddings": 1857.8955, "encoder_q-layer.0": 1308.6532, "encoder_q-layer.1": 1342.0935, "encoder_q-layer.10": 1165.7561, "encoder_q-layer.11": 2713.6157, "encoder_q-layer.2": 1537.1555, "encoder_q-layer.3": 1617.9073, "encoder_q-layer.4": 1687.6473, "encoder_q-layer.5": 1600.5244, "encoder_q-layer.6": 1538.6215, "encoder_q-layer.7": 1618.5775, "encoder_q-layer.8": 1715.4778, "encoder_q-layer.9": 1278.5579, "epoch": 0.4, "inbatch_neg_score": 0.2368, "inbatch_pos_score": 0.8779, "learning_rate": 3.283333333333333e-05, "loss": 3.2634, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.7364, "num_token_overlap": 15.8804, "num_token_query": 42.3959, "num_token_union": 68.4524, "num_word_context": 202.2517, "num_word_doc": 49.7933, "num_word_query": 32.0396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2486.4886, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2363, "query_norm": 1.3904, "queue_k_norm": 1.4175, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3959, "sent_len_1": 66.7364, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.9563, "stdk": 0.0478, "stdq": 0.0454, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.2624, "doc_norm": 1.4141, "encoder_q-embeddings": 1857.5703, "encoder_q-layer.0": 1124.6021, "encoder_q-layer.1": 1233.4384, "encoder_q-layer.10": 1145.6295, "encoder_q-layer.11": 2678.561, "encoder_q-layer.2": 1310.4883, "encoder_q-layer.3": 1317.5336, "encoder_q-layer.4": 1260.4812, "encoder_q-layer.5": 1167.4153, "encoder_q-layer.6": 1258.8953, "encoder_q-layer.7": 1414.7073, "encoder_q-layer.8": 1433.9373, "encoder_q-layer.9": 1164.5128, "epoch": 0.4, "inbatch_neg_score": 0.2398, "inbatch_pos_score": 0.873, "learning_rate": 3.277777777777778e-05, "loss": 3.2624, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.7038, "num_token_overlap": 15.8504, "num_token_query": 42.4455, "num_token_union": 68.4723, "num_word_context": 202.2051, "num_word_doc": 49.7588, "num_word_query": 32.0643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2255.6574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.3536, "queue_k_norm": 1.4147, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4455, "sent_len_1": 66.7038, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.78, "stdk": 0.0478, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.271, "doc_norm": 1.4118, "encoder_q-embeddings": 1648.3279, "encoder_q-layer.0": 1097.7803, "encoder_q-layer.1": 1218.3591, "encoder_q-layer.10": 1212.8591, "encoder_q-layer.11": 2587.7168, "encoder_q-layer.2": 1342.3699, "encoder_q-layer.3": 1443.9586, "encoder_q-layer.4": 1466.4463, "encoder_q-layer.5": 1514.4319, "encoder_q-layer.6": 1516.4053, "encoder_q-layer.7": 1637.2949, "encoder_q-layer.8": 1652.3861, "encoder_q-layer.9": 1270.2329, "epoch": 0.4, "inbatch_neg_score": 0.2372, "inbatch_pos_score": 0.8965, "learning_rate": 3.272222222222223e-05, "loss": 3.271, "norm_diff": 0.0429, "norm_loss": 0.0, "num_token_doc": 66.5271, "num_token_overlap": 15.8073, "num_token_query": 42.5121, "num_token_union": 68.4447, "num_word_context": 202.1755, "num_word_doc": 49.5854, "num_word_query": 32.1271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2295.9938, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2367, "query_norm": 1.369, "queue_k_norm": 1.4137, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5121, "sent_len_1": 66.5271, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.0275, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2619, "doc_norm": 1.413, "encoder_q-embeddings": 1172.6442, "encoder_q-layer.0": 774.2244, "encoder_q-layer.1": 818.1365, "encoder_q-layer.10": 1287.6411, "encoder_q-layer.11": 2623.0212, "encoder_q-layer.2": 919.8304, "encoder_q-layer.3": 918.0482, "encoder_q-layer.4": 920.5115, "encoder_q-layer.5": 896.9671, "encoder_q-layer.6": 960.0252, "encoder_q-layer.7": 1046.3829, "encoder_q-layer.8": 1255.7972, "encoder_q-layer.9": 1141.0266, "epoch": 0.4, "inbatch_neg_score": 0.231, "inbatch_pos_score": 0.897, "learning_rate": 3.266666666666667e-05, "loss": 3.2619, "norm_diff": 0.0478, "norm_loss": 0.0, "num_token_doc": 66.75, "num_token_overlap": 15.827, "num_token_query": 42.3079, "num_token_union": 68.4468, "num_word_context": 202.1475, "num_word_doc": 49.7963, "num_word_query": 31.9451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1820.5755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2308, "query_norm": 1.3651, "queue_k_norm": 1.4164, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3079, "sent_len_1": 66.75, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.6625, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.2841, "doc_norm": 1.4178, "encoder_q-embeddings": 1019.0679, "encoder_q-layer.0": 696.2786, "encoder_q-layer.1": 736.926, "encoder_q-layer.10": 1272.8228, "encoder_q-layer.11": 2855.26, "encoder_q-layer.2": 812.9317, "encoder_q-layer.3": 851.9354, "encoder_q-layer.4": 914.2203, "encoder_q-layer.5": 943.8035, "encoder_q-layer.6": 1072.22, "encoder_q-layer.7": 1216.3607, "encoder_q-layer.8": 1365.7021, "encoder_q-layer.9": 1227.5913, "epoch": 0.4, "inbatch_neg_score": 0.2465, "inbatch_pos_score": 0.8809, "learning_rate": 3.261111111111111e-05, "loss": 3.2841, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.8227, "num_token_overlap": 15.8414, "num_token_query": 42.3103, "num_token_union": 68.5268, "num_word_context": 202.358, "num_word_doc": 49.8749, "num_word_query": 31.9521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1919.8708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2461, "query_norm": 1.3784, "queue_k_norm": 1.416, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3103, "sent_len_1": 66.8227, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8487, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2373, "doc_norm": 1.4116, "encoder_q-embeddings": 5129.4053, "encoder_q-layer.0": 3720.615, "encoder_q-layer.1": 3991.8123, "encoder_q-layer.10": 1162.0823, "encoder_q-layer.11": 2719.9243, "encoder_q-layer.2": 4780.729, "encoder_q-layer.3": 5187.6787, "encoder_q-layer.4": 5604.3828, "encoder_q-layer.5": 4719.9604, "encoder_q-layer.6": 4534.0186, "encoder_q-layer.7": 4187.3774, "encoder_q-layer.8": 3507.8662, "encoder_q-layer.9": 1927.1068, "epoch": 0.4, "inbatch_neg_score": 0.2384, "inbatch_pos_score": 0.8906, "learning_rate": 3.2555555555555555e-05, "loss": 3.2373, "norm_diff": 0.0396, "norm_loss": 0.0, "num_token_doc": 67.0362, "num_token_overlap": 15.8796, "num_token_query": 42.4719, "num_token_union": 68.7439, "num_word_context": 202.8307, "num_word_doc": 50.0256, "num_word_query": 32.1198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6324.381, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2386, "query_norm": 1.372, "queue_k_norm": 1.4158, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4719, "sent_len_1": 67.0362, "sent_len_max_0": 128.0, "sent_len_max_1": 191.085, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.28, "doc_norm": 1.414, "encoder_q-embeddings": 1050.1501, "encoder_q-layer.0": 715.8137, "encoder_q-layer.1": 775.2733, "encoder_q-layer.10": 1264.3428, "encoder_q-layer.11": 2802.6643, "encoder_q-layer.2": 844.3239, "encoder_q-layer.3": 879.5166, "encoder_q-layer.4": 930.5971, "encoder_q-layer.5": 972.2988, "encoder_q-layer.6": 1116.5388, "encoder_q-layer.7": 1181.0421, "encoder_q-layer.8": 1373.5281, "encoder_q-layer.9": 1218.766, "epoch": 0.41, "inbatch_neg_score": 0.2314, "inbatch_pos_score": 0.8672, "learning_rate": 3.2500000000000004e-05, "loss": 3.28, "norm_diff": 0.0624, "norm_loss": 0.0, "num_token_doc": 66.8176, "num_token_overlap": 15.7951, "num_token_query": 42.2557, "num_token_union": 68.4912, "num_word_context": 202.5167, "num_word_doc": 49.8758, "num_word_query": 31.9132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1874.1499, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2317, "query_norm": 1.3516, "queue_k_norm": 1.4165, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2557, "sent_len_1": 66.8176, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.0913, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2571, "doc_norm": 1.412, "encoder_q-embeddings": 2104.5715, "encoder_q-layer.0": 1545.875, "encoder_q-layer.1": 1764.0909, "encoder_q-layer.10": 1213.2385, "encoder_q-layer.11": 2918.5034, "encoder_q-layer.2": 1903.5909, "encoder_q-layer.3": 1912.8336, "encoder_q-layer.4": 1903.9257, "encoder_q-layer.5": 1905.4763, "encoder_q-layer.6": 1880.6906, "encoder_q-layer.7": 1753.1533, "encoder_q-layer.8": 1764.5743, "encoder_q-layer.9": 1307.8171, "epoch": 0.41, "inbatch_neg_score": 0.2342, "inbatch_pos_score": 0.8965, "learning_rate": 3.2444444444444446e-05, "loss": 3.2571, "norm_diff": 0.0439, "norm_loss": 0.0, "num_token_doc": 66.8601, "num_token_overlap": 15.883, "num_token_query": 42.421, "num_token_union": 68.4819, "num_word_context": 202.3456, "num_word_doc": 49.8815, "num_word_query": 32.0455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2839.6663, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2346, "query_norm": 1.3681, "queue_k_norm": 1.4157, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.421, "sent_len_1": 66.8601, "sent_len_max_0": 128.0, "sent_len_max_1": 190.645, "stdk": 0.0478, "stdq": 0.0449, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2419, "doc_norm": 1.4183, "encoder_q-embeddings": 1245.5624, "encoder_q-layer.0": 812.7909, "encoder_q-layer.1": 853.4486, "encoder_q-layer.10": 1241.3768, "encoder_q-layer.11": 2667.2983, "encoder_q-layer.2": 1003.4958, "encoder_q-layer.3": 1027.3513, "encoder_q-layer.4": 1092.2659, "encoder_q-layer.5": 1161.7115, "encoder_q-layer.6": 1195.9357, "encoder_q-layer.7": 1225.2751, "encoder_q-layer.8": 1370.0011, "encoder_q-layer.9": 1150.0781, "epoch": 0.41, "inbatch_neg_score": 0.2343, "inbatch_pos_score": 0.895, "learning_rate": 3.238888888888889e-05, "loss": 3.2419, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.475, "num_token_overlap": 15.7813, "num_token_query": 42.1349, "num_token_union": 68.2148, "num_word_context": 201.9378, "num_word_doc": 49.6201, "num_word_query": 31.8051, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1935.3207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2352, "query_norm": 1.3671, "queue_k_norm": 1.4149, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1349, "sent_len_1": 66.475, "sent_len_max_0": 128.0, "sent_len_max_1": 189.46, "stdk": 0.048, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2682, "doc_norm": 1.4131, "encoder_q-embeddings": 1254.4321, "encoder_q-layer.0": 832.9797, "encoder_q-layer.1": 868.3522, "encoder_q-layer.10": 1350.7809, "encoder_q-layer.11": 2762.8872, "encoder_q-layer.2": 934.907, "encoder_q-layer.3": 971.2714, "encoder_q-layer.4": 976.034, "encoder_q-layer.5": 1026.3599, "encoder_q-layer.6": 1094.6465, "encoder_q-layer.7": 1247.1719, "encoder_q-layer.8": 1360.0608, "encoder_q-layer.9": 1214.937, "epoch": 0.41, "inbatch_neg_score": 0.2362, "inbatch_pos_score": 0.8765, "learning_rate": 3.233333333333333e-05, "loss": 3.2682, "norm_diff": 0.0515, "norm_loss": 0.0, "num_token_doc": 66.6472, "num_token_overlap": 15.7486, "num_token_query": 42.2607, "num_token_union": 68.4449, "num_word_context": 202.3291, "num_word_doc": 49.7039, "num_word_query": 31.9139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1967.0852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2361, "query_norm": 1.3616, "queue_k_norm": 1.414, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2607, "sent_len_1": 66.6472, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3475, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.2589, "doc_norm": 1.4191, "encoder_q-embeddings": 1403.1222, "encoder_q-layer.0": 920.5551, "encoder_q-layer.1": 1031.129, "encoder_q-layer.10": 1214.1439, "encoder_q-layer.11": 2576.8108, "encoder_q-layer.2": 1170.174, "encoder_q-layer.3": 1237.9127, "encoder_q-layer.4": 1191.4445, "encoder_q-layer.5": 1202.1807, "encoder_q-layer.6": 1297.2908, "encoder_q-layer.7": 1293.248, "encoder_q-layer.8": 1376.3385, "encoder_q-layer.9": 1136.4058, "epoch": 0.41, "inbatch_neg_score": 0.2306, "inbatch_pos_score": 0.8848, "learning_rate": 3.227777777777778e-05, "loss": 3.2589, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.6227, "num_token_overlap": 15.8917, "num_token_query": 42.5784, "num_token_union": 68.4715, "num_word_context": 202.443, "num_word_doc": 49.7099, "num_word_query": 32.1654, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2047.6548, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2299, "query_norm": 1.3656, "queue_k_norm": 1.4155, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5784, "sent_len_1": 66.6227, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1012, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2778, "doc_norm": 1.4088, "encoder_q-embeddings": 1371.7461, "encoder_q-layer.0": 909.7863, "encoder_q-layer.1": 1041.2759, "encoder_q-layer.10": 1139.8199, "encoder_q-layer.11": 2614.6121, "encoder_q-layer.2": 1170.2837, "encoder_q-layer.3": 1252.2039, "encoder_q-layer.4": 1294.7399, "encoder_q-layer.5": 1331.8345, "encoder_q-layer.6": 1429.0289, "encoder_q-layer.7": 1289.3588, "encoder_q-layer.8": 1298.4412, "encoder_q-layer.9": 1122.3627, "epoch": 0.41, "inbatch_neg_score": 0.2218, "inbatch_pos_score": 0.9014, "learning_rate": 3.222222222222223e-05, "loss": 3.2778, "norm_diff": 0.0525, "norm_loss": 0.0, "num_token_doc": 66.6523, "num_token_overlap": 15.7971, "num_token_query": 42.2638, "num_token_union": 68.404, "num_word_context": 202.0219, "num_word_doc": 49.7179, "num_word_query": 31.9198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2057.2584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2216, "query_norm": 1.3563, "queue_k_norm": 1.4151, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2638, "sent_len_1": 66.6523, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6887, "stdk": 0.0477, "stdq": 0.0445, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.2452, "doc_norm": 1.4158, "encoder_q-embeddings": 1639.3491, "encoder_q-layer.0": 1058.9452, "encoder_q-layer.1": 1170.2727, "encoder_q-layer.10": 1221.8038, "encoder_q-layer.11": 2719.2549, "encoder_q-layer.2": 1305.3042, "encoder_q-layer.3": 1367.7466, "encoder_q-layer.4": 1411.9834, "encoder_q-layer.5": 1518.6423, "encoder_q-layer.6": 1488.3647, "encoder_q-layer.7": 1460.5172, "encoder_q-layer.8": 1477.9492, "encoder_q-layer.9": 1226.4681, "epoch": 0.41, "inbatch_neg_score": 0.2357, "inbatch_pos_score": 0.9219, "learning_rate": 3.2166666666666665e-05, "loss": 3.2452, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.8949, "num_token_overlap": 15.8203, "num_token_query": 42.2362, "num_token_union": 68.4615, "num_word_context": 202.5446, "num_word_doc": 49.8969, "num_word_query": 31.918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2294.1408, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2362, "query_norm": 1.4034, "queue_k_norm": 1.4169, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2362, "sent_len_1": 66.8949, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.8688, "stdk": 0.0479, "stdq": 0.0459, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.264, "doc_norm": 1.4096, "encoder_q-embeddings": 3055.9124, "encoder_q-layer.0": 2141.8872, "encoder_q-layer.1": 2397.4731, "encoder_q-layer.10": 1207.553, "encoder_q-layer.11": 2756.9441, "encoder_q-layer.2": 2774.8584, "encoder_q-layer.3": 2923.5208, "encoder_q-layer.4": 2621.4629, "encoder_q-layer.5": 2866.759, "encoder_q-layer.6": 2519.3623, "encoder_q-layer.7": 2165.1804, "encoder_q-layer.8": 2094.8914, "encoder_q-layer.9": 1251.291, "epoch": 0.41, "inbatch_neg_score": 0.2238, "inbatch_pos_score": 0.8486, "learning_rate": 3.2111111111111114e-05, "loss": 3.264, "norm_diff": 0.0499, "norm_loss": 0.0, "num_token_doc": 66.7743, "num_token_overlap": 15.8673, "num_token_query": 42.4574, "num_token_union": 68.5209, "num_word_context": 202.0441, "num_word_doc": 49.8406, "num_word_query": 32.0682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3683.8284, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2229, "query_norm": 1.3597, "queue_k_norm": 1.4155, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4574, "sent_len_1": 66.7743, "sent_len_max_0": 127.995, "sent_len_max_1": 189.2113, "stdk": 0.0477, "stdq": 0.0444, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2591, "doc_norm": 1.4151, "encoder_q-embeddings": 1215.7725, "encoder_q-layer.0": 793.9407, "encoder_q-layer.1": 869.111, "encoder_q-layer.10": 1314.2749, "encoder_q-layer.11": 2583.8599, "encoder_q-layer.2": 1007.9028, "encoder_q-layer.3": 1041.7262, "encoder_q-layer.4": 1079.6042, "encoder_q-layer.5": 1105.6074, "encoder_q-layer.6": 1270.97, "encoder_q-layer.7": 1264.2842, "encoder_q-layer.8": 1430.5852, "encoder_q-layer.9": 1206.672, "epoch": 0.41, "inbatch_neg_score": 0.2327, "inbatch_pos_score": 0.9062, "learning_rate": 3.2055555555555556e-05, "loss": 3.2591, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.5272, "num_token_overlap": 15.857, "num_token_query": 42.3664, "num_token_union": 68.3246, "num_word_context": 201.9497, "num_word_doc": 49.6624, "num_word_query": 32.0027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1921.0616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2345, "query_norm": 1.3958, "queue_k_norm": 1.4147, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3664, "sent_len_1": 66.5272, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.6562, "stdk": 0.048, "stdq": 0.0453, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2561, "doc_norm": 1.4125, "encoder_q-embeddings": 1642.8635, "encoder_q-layer.0": 1093.4812, "encoder_q-layer.1": 1152.7448, "encoder_q-layer.10": 1169.168, "encoder_q-layer.11": 2530.8972, "encoder_q-layer.2": 1278.479, "encoder_q-layer.3": 1337.0714, "encoder_q-layer.4": 1420.4047, "encoder_q-layer.5": 1455.7345, "encoder_q-layer.6": 1470.8212, "encoder_q-layer.7": 1501.8845, "encoder_q-layer.8": 1547.5145, "encoder_q-layer.9": 1176.627, "epoch": 0.41, "inbatch_neg_score": 0.2349, "inbatch_pos_score": 0.9014, "learning_rate": 3.2000000000000005e-05, "loss": 3.2561, "norm_diff": 0.0131, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 15.8502, "num_token_query": 42.5099, "num_token_union": 68.6116, "num_word_context": 202.3792, "num_word_doc": 49.8208, "num_word_query": 32.1284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2224.2564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2363, "query_norm": 1.4023, "queue_k_norm": 1.4163, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5099, "sent_len_1": 66.8218, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.1637, "stdk": 0.0479, "stdq": 0.0453, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2486, "doc_norm": 1.4184, "encoder_q-embeddings": 1059.9574, "encoder_q-layer.0": 703.3945, "encoder_q-layer.1": 715.8928, "encoder_q-layer.10": 1230.1711, "encoder_q-layer.11": 2660.4458, "encoder_q-layer.2": 814.7705, "encoder_q-layer.3": 830.1844, "encoder_q-layer.4": 883.3318, "encoder_q-layer.5": 896.9728, "encoder_q-layer.6": 994.8971, "encoder_q-layer.7": 1093.4423, "encoder_q-layer.8": 1315.3625, "encoder_q-layer.9": 1151.6766, "epoch": 0.41, "inbatch_neg_score": 0.2339, "inbatch_pos_score": 0.8916, "learning_rate": 3.194444444444444e-05, "loss": 3.2486, "norm_diff": 0.0195, "norm_loss": 0.0, "num_token_doc": 66.6443, "num_token_overlap": 15.8573, "num_token_query": 42.3749, "num_token_union": 68.4194, "num_word_context": 202.0829, "num_word_doc": 49.7203, "num_word_query": 32.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1793.534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2336, "query_norm": 1.4001, "queue_k_norm": 1.4156, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3749, "sent_len_1": 66.6443, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4363, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2441, "doc_norm": 1.4203, "encoder_q-embeddings": 1532.1837, "encoder_q-layer.0": 990.5981, "encoder_q-layer.1": 1049.6243, "encoder_q-layer.10": 1226.8862, "encoder_q-layer.11": 2601.3623, "encoder_q-layer.2": 1204.8988, "encoder_q-layer.3": 1265.1405, "encoder_q-layer.4": 1249.4559, "encoder_q-layer.5": 1283.4043, "encoder_q-layer.6": 1376.0616, "encoder_q-layer.7": 1565.0844, "encoder_q-layer.8": 1448.5085, "encoder_q-layer.9": 1182.5405, "epoch": 0.42, "inbatch_neg_score": 0.232, "inbatch_pos_score": 0.9209, "learning_rate": 3.188888888888889e-05, "loss": 3.2441, "norm_diff": 0.0097, "norm_loss": 0.0, "num_token_doc": 67.0046, "num_token_overlap": 15.854, "num_token_query": 42.43, "num_token_union": 68.7, "num_word_context": 202.6953, "num_word_doc": 50.0308, "num_word_query": 32.0533, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2148.2316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2327, "query_norm": 1.4117, "queue_k_norm": 1.4161, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.43, "sent_len_1": 67.0046, "sent_len_max_0": 127.99, "sent_len_max_1": 188.5962, "stdk": 0.0482, "stdq": 0.0456, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.263, "doc_norm": 1.4133, "encoder_q-embeddings": 6340.6631, "encoder_q-layer.0": 4489.5073, "encoder_q-layer.1": 5192.8579, "encoder_q-layer.10": 2573.1699, "encoder_q-layer.11": 5199.1772, "encoder_q-layer.2": 5902.0103, "encoder_q-layer.3": 6075.457, "encoder_q-layer.4": 5906.6851, "encoder_q-layer.5": 6058.9565, "encoder_q-layer.6": 5743.002, "encoder_q-layer.7": 4817.7183, "encoder_q-layer.8": 4259.1968, "encoder_q-layer.9": 2734.4531, "epoch": 0.42, "inbatch_neg_score": 0.238, "inbatch_pos_score": 0.9053, "learning_rate": 3.183333333333334e-05, "loss": 3.263, "norm_diff": 0.0093, "norm_loss": 0.0, "num_token_doc": 66.5863, "num_token_overlap": 15.7889, "num_token_query": 42.244, "num_token_union": 68.3114, "num_word_context": 201.9535, "num_word_doc": 49.6277, "num_word_query": 31.8841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7732.2059, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2378, "query_norm": 1.4105, "queue_k_norm": 1.4163, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.244, "sent_len_1": 66.5863, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4325, "stdk": 0.0478, "stdq": 0.0457, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 50.4883, "active_queue_size": 16384.0, "cl_loss": 3.2558, "doc_norm": 1.4112, "encoder_q-embeddings": 4611.5371, "encoder_q-layer.0": 3278.7734, "encoder_q-layer.1": 3682.6909, "encoder_q-layer.10": 2609.45, "encoder_q-layer.11": 5695.0029, "encoder_q-layer.2": 4157.7573, "encoder_q-layer.3": 4704.7842, "encoder_q-layer.4": 4847.0376, "encoder_q-layer.5": 5120.4468, "encoder_q-layer.6": 5186.5088, "encoder_q-layer.7": 4322.0293, "encoder_q-layer.8": 4247.6758, "encoder_q-layer.9": 3040.8938, "epoch": 0.42, "inbatch_neg_score": 0.2412, "inbatch_pos_score": 0.8569, "learning_rate": 3.177777777777778e-05, "loss": 3.2558, "norm_diff": 0.0404, "norm_loss": 0.0, "num_token_doc": 66.8357, "num_token_overlap": 15.7806, "num_token_query": 42.2903, "num_token_union": 68.5139, "num_word_context": 202.428, "num_word_doc": 49.8635, "num_word_query": 31.9364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6476.5566, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2419, "query_norm": 1.3708, "queue_k_norm": 1.4156, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2903, "sent_len_1": 66.8357, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8688, "stdk": 0.0478, "stdq": 0.0442, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.2457, "doc_norm": 1.4175, "encoder_q-embeddings": 2510.9414, "encoder_q-layer.0": 1624.1045, "encoder_q-layer.1": 1801.2173, "encoder_q-layer.10": 2204.688, "encoder_q-layer.11": 5346.2754, "encoder_q-layer.2": 2038.3726, "encoder_q-layer.3": 2173.1016, "encoder_q-layer.4": 2235.4287, "encoder_q-layer.5": 2286.1377, "encoder_q-layer.6": 2444.6201, "encoder_q-layer.7": 2578.8311, "encoder_q-layer.8": 2623.7271, "encoder_q-layer.9": 2285.8721, "epoch": 0.42, "inbatch_neg_score": 0.2479, "inbatch_pos_score": 0.8994, "learning_rate": 3.1722222222222224e-05, "loss": 3.2457, "norm_diff": 0.0621, "norm_loss": 0.0, "num_token_doc": 66.6756, "num_token_overlap": 15.8053, "num_token_query": 42.2417, "num_token_union": 68.3881, "num_word_context": 202.0633, "num_word_doc": 49.7521, "num_word_query": 31.8991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3905.4608, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2479, "query_norm": 1.3554, "queue_k_norm": 1.417, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2417, "sent_len_1": 66.6756, "sent_len_max_0": 127.995, "sent_len_max_1": 188.7012, "stdk": 0.048, "stdq": 0.0437, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.2174, "doc_norm": 1.4213, "encoder_q-embeddings": 3771.8354, "encoder_q-layer.0": 2398.0503, "encoder_q-layer.1": 2526.5061, "encoder_q-layer.10": 2467.6975, "encoder_q-layer.11": 5450.7651, "encoder_q-layer.2": 2865.0173, "encoder_q-layer.3": 2954.4097, "encoder_q-layer.4": 3061.8882, "encoder_q-layer.5": 3198.2095, "encoder_q-layer.6": 3294.1299, "encoder_q-layer.7": 3356.6025, "encoder_q-layer.8": 3122.7229, "encoder_q-layer.9": 2564.446, "epoch": 0.42, "inbatch_neg_score": 0.2484, "inbatch_pos_score": 0.9043, "learning_rate": 3.1666666666666666e-05, "loss": 3.2174, "norm_diff": 0.0542, "norm_loss": 0.0, "num_token_doc": 67.0523, "num_token_overlap": 15.8802, "num_token_query": 42.2282, "num_token_union": 68.5023, "num_word_context": 202.3075, "num_word_doc": 49.988, "num_word_query": 31.8742, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4881.9746, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.249, "query_norm": 1.3671, "queue_k_norm": 1.4167, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2282, "sent_len_1": 67.0523, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.6962, "stdk": 0.0481, "stdq": 0.0443, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2185, "doc_norm": 1.4163, "encoder_q-embeddings": 3443.2571, "encoder_q-layer.0": 2221.7009, "encoder_q-layer.1": 2432.2029, "encoder_q-layer.10": 2518.3369, "encoder_q-layer.11": 5386.7534, "encoder_q-layer.2": 3052.4458, "encoder_q-layer.3": 3112.5708, "encoder_q-layer.4": 3306.8215, "encoder_q-layer.5": 3218.5967, "encoder_q-layer.6": 3237.2852, "encoder_q-layer.7": 3339.7737, "encoder_q-layer.8": 3585.6064, "encoder_q-layer.9": 2576.499, "epoch": 0.42, "inbatch_neg_score": 0.2535, "inbatch_pos_score": 0.9116, "learning_rate": 3.1611111111111115e-05, "loss": 3.2185, "norm_diff": 0.0315, "norm_loss": 0.0, "num_token_doc": 66.9083, "num_token_overlap": 15.8633, "num_token_query": 42.4551, "num_token_union": 68.5863, "num_word_context": 202.2021, "num_word_doc": 49.9055, "num_word_query": 32.074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4899.8577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2529, "query_norm": 1.3848, "queue_k_norm": 1.4179, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4551, "sent_len_1": 66.9083, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.8162, "stdk": 0.0479, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2443, "doc_norm": 1.4203, "encoder_q-embeddings": 2537.8728, "encoder_q-layer.0": 1689.7546, "encoder_q-layer.1": 1882.1476, "encoder_q-layer.10": 2333.5754, "encoder_q-layer.11": 5711.7603, "encoder_q-layer.2": 2126.7754, "encoder_q-layer.3": 2139.1843, "encoder_q-layer.4": 2218.2883, "encoder_q-layer.5": 2317.7334, "encoder_q-layer.6": 2432.2354, "encoder_q-layer.7": 2642.3005, "encoder_q-layer.8": 2821.4324, "encoder_q-layer.9": 2503.8289, "epoch": 0.42, "inbatch_neg_score": 0.2493, "inbatch_pos_score": 0.9141, "learning_rate": 3.155555555555556e-05, "loss": 3.2443, "norm_diff": 0.0347, "norm_loss": 0.0, "num_token_doc": 66.9317, "num_token_overlap": 15.8529, "num_token_query": 42.4995, "num_token_union": 68.6157, "num_word_context": 202.4427, "num_word_doc": 49.9335, "num_word_query": 32.1253, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4164.3073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2477, "query_norm": 1.3856, "queue_k_norm": 1.4169, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4995, "sent_len_1": 66.9317, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.2175, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.2498, "doc_norm": 1.4119, "encoder_q-embeddings": 3250.4602, "encoder_q-layer.0": 2133.4844, "encoder_q-layer.1": 2344.1575, "encoder_q-layer.10": 2491.1553, "encoder_q-layer.11": 5574.7759, "encoder_q-layer.2": 2841.4192, "encoder_q-layer.3": 3016.4746, "encoder_q-layer.4": 3248.6892, "encoder_q-layer.5": 3229.2932, "encoder_q-layer.6": 3022.3809, "encoder_q-layer.7": 2978.4111, "encoder_q-layer.8": 3110.1187, "encoder_q-layer.9": 2545.2827, "epoch": 0.42, "inbatch_neg_score": 0.2525, "inbatch_pos_score": 0.9028, "learning_rate": 3.15e-05, "loss": 3.2498, "norm_diff": 0.0342, "norm_loss": 0.0, "num_token_doc": 66.936, "num_token_overlap": 15.8935, "num_token_query": 42.4879, "num_token_union": 68.6328, "num_word_context": 202.2267, "num_word_doc": 49.9815, "num_word_query": 32.0897, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4718.7433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.252, "query_norm": 1.3777, "queue_k_norm": 1.4172, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4879, "sent_len_1": 66.936, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.0075, "stdk": 0.0477, "stdq": 0.0447, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2377, "doc_norm": 1.4187, "encoder_q-embeddings": 2517.446, "encoder_q-layer.0": 1576.2791, "encoder_q-layer.1": 1751.6034, "encoder_q-layer.10": 2347.4758, "encoder_q-layer.11": 5502.8726, "encoder_q-layer.2": 1974.277, "encoder_q-layer.3": 2176.1555, "encoder_q-layer.4": 2268.3652, "encoder_q-layer.5": 2288.5544, "encoder_q-layer.6": 2347.0154, "encoder_q-layer.7": 2614.0654, "encoder_q-layer.8": 3032.5139, "encoder_q-layer.9": 2447.5732, "epoch": 0.42, "inbatch_neg_score": 0.2408, "inbatch_pos_score": 0.9058, "learning_rate": 3.144444444444445e-05, "loss": 3.2377, "norm_diff": 0.0481, "norm_loss": 0.0, "num_token_doc": 66.9035, "num_token_overlap": 15.8647, "num_token_query": 42.3704, "num_token_union": 68.5718, "num_word_context": 202.3589, "num_word_doc": 49.9497, "num_word_query": 32.0048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4067.9673, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2393, "query_norm": 1.3706, "queue_k_norm": 1.4187, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3704, "sent_len_1": 66.9035, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2738, "stdk": 0.048, "stdq": 0.0449, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.2445, "doc_norm": 1.4106, "encoder_q-embeddings": 2710.0759, "encoder_q-layer.0": 1817.4921, "encoder_q-layer.1": 1974.1821, "encoder_q-layer.10": 2617.2161, "encoder_q-layer.11": 5670.7695, "encoder_q-layer.2": 2190.1174, "encoder_q-layer.3": 2350.7356, "encoder_q-layer.4": 2479.6858, "encoder_q-layer.5": 2445.9919, "encoder_q-layer.6": 2497.8318, "encoder_q-layer.7": 2788.6384, "encoder_q-layer.8": 2971.96, "encoder_q-layer.9": 2535.5095, "epoch": 0.42, "inbatch_neg_score": 0.244, "inbatch_pos_score": 0.8765, "learning_rate": 3.138888888888889e-05, "loss": 3.2445, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.8062, "num_token_overlap": 15.7801, "num_token_query": 42.1807, "num_token_union": 68.4804, "num_word_context": 202.4167, "num_word_doc": 49.8743, "num_word_query": 31.8508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4231.3728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2439, "query_norm": 1.3649, "queue_k_norm": 1.4197, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1807, "sent_len_1": 66.8062, "sent_len_max_0": 127.99, "sent_len_max_1": 189.5788, "stdk": 0.0477, "stdq": 0.0446, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2552, "doc_norm": 1.421, "encoder_q-embeddings": 3229.4883, "encoder_q-layer.0": 2223.9934, "encoder_q-layer.1": 2468.646, "encoder_q-layer.10": 2435.7478, "encoder_q-layer.11": 5483.0713, "encoder_q-layer.2": 2790.3516, "encoder_q-layer.3": 2929.8701, "encoder_q-layer.4": 3191.8777, "encoder_q-layer.5": 2811.8818, "encoder_q-layer.6": 3042.2627, "encoder_q-layer.7": 2645.5674, "encoder_q-layer.8": 2932.668, "encoder_q-layer.9": 2512.3062, "epoch": 0.43, "inbatch_neg_score": 0.2427, "inbatch_pos_score": 0.9131, "learning_rate": 3.1333333333333334e-05, "loss": 3.2552, "norm_diff": 0.0449, "norm_loss": 0.0, "num_token_doc": 66.4332, "num_token_overlap": 15.7322, "num_token_query": 42.2302, "num_token_union": 68.3086, "num_word_context": 201.7714, "num_word_doc": 49.5878, "num_word_query": 31.913, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4627.5236, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.243, "query_norm": 1.3761, "queue_k_norm": 1.4173, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2302, "sent_len_1": 66.4332, "sent_len_max_0": 128.0, "sent_len_max_1": 187.61, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2521, "doc_norm": 1.4138, "encoder_q-embeddings": 2193.6282, "encoder_q-layer.0": 1406.6903, "encoder_q-layer.1": 1499.2039, "encoder_q-layer.10": 2492.5063, "encoder_q-layer.11": 5578.165, "encoder_q-layer.2": 1671.3507, "encoder_q-layer.3": 1681.4604, "encoder_q-layer.4": 1732.3651, "encoder_q-layer.5": 1901.6923, "encoder_q-layer.6": 2016.2252, "encoder_q-layer.7": 2286.9294, "encoder_q-layer.8": 2514.7493, "encoder_q-layer.9": 2340.6616, "epoch": 0.43, "inbatch_neg_score": 0.2434, "inbatch_pos_score": 0.8818, "learning_rate": 3.1277777777777776e-05, "loss": 3.2521, "norm_diff": 0.0425, "norm_loss": 0.0, "num_token_doc": 66.7473, "num_token_overlap": 15.8436, "num_token_query": 42.3557, "num_token_union": 68.4119, "num_word_context": 202.4951, "num_word_doc": 49.8148, "num_word_query": 32.0017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3762.4053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2427, "query_norm": 1.3713, "queue_k_norm": 1.4192, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3557, "sent_len_1": 66.7473, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0737, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.2329, "doc_norm": 1.4134, "encoder_q-embeddings": 2463.728, "encoder_q-layer.0": 1640.4351, "encoder_q-layer.1": 1773.4152, "encoder_q-layer.10": 2507.3589, "encoder_q-layer.11": 5697.4106, "encoder_q-layer.2": 1991.0901, "encoder_q-layer.3": 1977.7915, "encoder_q-layer.4": 2037.8348, "encoder_q-layer.5": 2011.5183, "encoder_q-layer.6": 2247.4336, "encoder_q-layer.7": 2484.3533, "encoder_q-layer.8": 2943.0334, "encoder_q-layer.9": 2547.5925, "epoch": 0.43, "inbatch_neg_score": 0.2419, "inbatch_pos_score": 0.8882, "learning_rate": 3.1222222222222225e-05, "loss": 3.2329, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.7735, "num_token_overlap": 15.7903, "num_token_query": 42.2476, "num_token_union": 68.4848, "num_word_context": 202.1657, "num_word_doc": 49.8114, "num_word_query": 31.9245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4082.7621, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2411, "query_norm": 1.3539, "queue_k_norm": 1.4194, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2476, "sent_len_1": 66.7735, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.5613, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.2409, "doc_norm": 1.4206, "encoder_q-embeddings": 2233.8328, "encoder_q-layer.0": 1514.6924, "encoder_q-layer.1": 1578.1403, "encoder_q-layer.10": 2530.3032, "encoder_q-layer.11": 5279.9302, "encoder_q-layer.2": 1766.0979, "encoder_q-layer.3": 1798.7554, "encoder_q-layer.4": 1843.5427, "encoder_q-layer.5": 1854.5333, "encoder_q-layer.6": 2033.3579, "encoder_q-layer.7": 2326.5635, "encoder_q-layer.8": 2563.8972, "encoder_q-layer.9": 2360.0796, "epoch": 0.43, "inbatch_neg_score": 0.2412, "inbatch_pos_score": 0.9258, "learning_rate": 3.116666666666667e-05, "loss": 3.2409, "norm_diff": 0.0411, "norm_loss": 0.0, "num_token_doc": 66.6539, "num_token_overlap": 15.8558, "num_token_query": 42.3966, "num_token_union": 68.427, "num_word_context": 202.2914, "num_word_doc": 49.7431, "num_word_query": 32.0199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3726.1369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2422, "query_norm": 1.3837, "queue_k_norm": 1.4203, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3966, "sent_len_1": 66.6539, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5263, "stdk": 0.048, "stdq": 0.0452, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2371, "doc_norm": 1.4204, "encoder_q-embeddings": 1988.8608, "encoder_q-layer.0": 1280.0082, "encoder_q-layer.1": 1331.8452, "encoder_q-layer.10": 2392.0281, "encoder_q-layer.11": 5220.3696, "encoder_q-layer.2": 1506.5258, "encoder_q-layer.3": 1507.6646, "encoder_q-layer.4": 1620.0227, "encoder_q-layer.5": 1678.5077, "encoder_q-layer.6": 1760.5396, "encoder_q-layer.7": 2010.2637, "encoder_q-layer.8": 2343.856, "encoder_q-layer.9": 2266.5339, "epoch": 0.43, "inbatch_neg_score": 0.2433, "inbatch_pos_score": 0.8867, "learning_rate": 3.111111111111111e-05, "loss": 3.2371, "norm_diff": 0.0544, "norm_loss": 0.0, "num_token_doc": 66.8085, "num_token_overlap": 15.852, "num_token_query": 42.3557, "num_token_union": 68.4998, "num_word_context": 202.1823, "num_word_doc": 49.8214, "num_word_query": 31.9992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3444.1367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2415, "query_norm": 1.366, "queue_k_norm": 1.4204, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3557, "sent_len_1": 66.8085, "sent_len_max_0": 127.995, "sent_len_max_1": 191.6463, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.2396, "doc_norm": 1.4215, "encoder_q-embeddings": 4650.3237, "encoder_q-layer.0": 3359.6946, "encoder_q-layer.1": 3437.2681, "encoder_q-layer.10": 2724.5701, "encoder_q-layer.11": 5842.7148, "encoder_q-layer.2": 3626.2957, "encoder_q-layer.3": 3795.4756, "encoder_q-layer.4": 3391.9631, "encoder_q-layer.5": 3041.3364, "encoder_q-layer.6": 3035.9941, "encoder_q-layer.7": 2983.5417, "encoder_q-layer.8": 3187.3591, "encoder_q-layer.9": 2621.4382, "epoch": 0.43, "inbatch_neg_score": 0.2403, "inbatch_pos_score": 0.8848, "learning_rate": 3.105555555555555e-05, "loss": 3.2396, "norm_diff": 0.0578, "norm_loss": 0.0, "num_token_doc": 66.5698, "num_token_overlap": 15.7551, "num_token_query": 42.1695, "num_token_union": 68.3087, "num_word_context": 202.0605, "num_word_doc": 49.6611, "num_word_query": 31.8477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5514.3528, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2396, "query_norm": 1.3637, "queue_k_norm": 1.4198, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1695, "sent_len_1": 66.5698, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.6712, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 49.8047, "active_queue_size": 16384.0, "cl_loss": 3.2343, "doc_norm": 1.4197, "encoder_q-embeddings": 2578.9663, "encoder_q-layer.0": 1653.5197, "encoder_q-layer.1": 1779.022, "encoder_q-layer.10": 2650.9001, "encoder_q-layer.11": 5872.5225, "encoder_q-layer.2": 2137.6843, "encoder_q-layer.3": 2215.0867, "encoder_q-layer.4": 2475.3958, "encoder_q-layer.5": 2689.8979, "encoder_q-layer.6": 2554.2356, "encoder_q-layer.7": 2598.071, "encoder_q-layer.8": 2802.6948, "encoder_q-layer.9": 2520.8506, "epoch": 0.43, "inbatch_neg_score": 0.24, "inbatch_pos_score": 0.8833, "learning_rate": 3.1e-05, "loss": 3.2343, "norm_diff": 0.0527, "norm_loss": 0.0, "num_token_doc": 66.6319, "num_token_overlap": 15.8505, "num_token_query": 42.3668, "num_token_union": 68.3879, "num_word_context": 202.1852, "num_word_doc": 49.703, "num_word_query": 31.9947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4256.4749, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2383, "query_norm": 1.367, "queue_k_norm": 1.4189, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3668, "sent_len_1": 66.6319, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.9338, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.2474, "doc_norm": 1.4194, "encoder_q-embeddings": 2571.3057, "encoder_q-layer.0": 1777.1812, "encoder_q-layer.1": 1878.1069, "encoder_q-layer.10": 2615.0498, "encoder_q-layer.11": 5397.2114, "encoder_q-layer.2": 2267.5, "encoder_q-layer.3": 2342.7581, "encoder_q-layer.4": 2447.2048, "encoder_q-layer.5": 2546.4805, "encoder_q-layer.6": 2504.4905, "encoder_q-layer.7": 2604.9888, "encoder_q-layer.8": 2997.6987, "encoder_q-layer.9": 2496.6865, "epoch": 0.43, "inbatch_neg_score": 0.2421, "inbatch_pos_score": 0.9102, "learning_rate": 3.094444444444445e-05, "loss": 3.2474, "norm_diff": 0.0273, "norm_loss": 0.0, "num_token_doc": 66.4579, "num_token_overlap": 15.7199, "num_token_query": 42.1605, "num_token_union": 68.294, "num_word_context": 202.0786, "num_word_doc": 49.5684, "num_word_query": 31.8428, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4120.5361, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2423, "query_norm": 1.392, "queue_k_norm": 1.419, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1605, "sent_len_1": 66.4579, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3825, "stdk": 0.048, "stdq": 0.0457, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2275, "doc_norm": 1.419, "encoder_q-embeddings": 12511.4893, "encoder_q-layer.0": 8866.0098, "encoder_q-layer.1": 8699.2148, "encoder_q-layer.10": 2429.0771, "encoder_q-layer.11": 5495.3467, "encoder_q-layer.2": 8876.9521, "encoder_q-layer.3": 8924.8525, "encoder_q-layer.4": 8752.1748, "encoder_q-layer.5": 7621.6113, "encoder_q-layer.6": 8195.7441, "encoder_q-layer.7": 7691.8496, "encoder_q-layer.8": 6875.2988, "encoder_q-layer.9": 3433.6807, "epoch": 0.43, "inbatch_neg_score": 0.2422, "inbatch_pos_score": 0.9209, "learning_rate": 3.088888888888889e-05, "loss": 3.2275, "norm_diff": 0.0363, "norm_loss": 0.0, "num_token_doc": 66.9875, "num_token_overlap": 15.8988, "num_token_query": 42.481, "num_token_union": 68.6307, "num_word_context": 202.4938, "num_word_doc": 50.0117, "num_word_query": 32.1199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12523.4028, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2408, "query_norm": 1.3827, "queue_k_norm": 1.4217, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.481, "sent_len_1": 66.9875, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.325, "stdk": 0.0479, "stdq": 0.0455, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.2371, "doc_norm": 1.4222, "encoder_q-embeddings": 2496.3887, "encoder_q-layer.0": 1699.9707, "encoder_q-layer.1": 1851.6039, "encoder_q-layer.10": 2461.9937, "encoder_q-layer.11": 5694.0186, "encoder_q-layer.2": 2054.9888, "encoder_q-layer.3": 2208.7588, "encoder_q-layer.4": 2395.0239, "encoder_q-layer.5": 2399.1863, "encoder_q-layer.6": 2373.5261, "encoder_q-layer.7": 2554.8354, "encoder_q-layer.8": 2757.1409, "encoder_q-layer.9": 2544.3203, "epoch": 0.43, "inbatch_neg_score": 0.2448, "inbatch_pos_score": 0.9155, "learning_rate": 3.0833333333333335e-05, "loss": 3.2371, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.982, "num_token_overlap": 15.9384, "num_token_query": 42.4371, "num_token_union": 68.568, "num_word_context": 202.4689, "num_word_doc": 49.9711, "num_word_query": 32.0692, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4106.3904, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2452, "query_norm": 1.3861, "queue_k_norm": 1.4208, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4371, "sent_len_1": 66.982, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9462, "stdk": 0.0481, "stdq": 0.0453, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.2143, "doc_norm": 1.4133, "encoder_q-embeddings": 6734.2178, "encoder_q-layer.0": 4748.0986, "encoder_q-layer.1": 5795.9077, "encoder_q-layer.10": 2469.6204, "encoder_q-layer.11": 5293.5757, "encoder_q-layer.2": 7291.1924, "encoder_q-layer.3": 8260.2979, "encoder_q-layer.4": 8994.8125, "encoder_q-layer.5": 9284.6221, "encoder_q-layer.6": 8378.2051, "encoder_q-layer.7": 7819.9829, "encoder_q-layer.8": 7154.7324, "encoder_q-layer.9": 3730.9492, "epoch": 0.44, "inbatch_neg_score": 0.2487, "inbatch_pos_score": 0.9136, "learning_rate": 3.077777777777778e-05, "loss": 3.2143, "norm_diff": 0.0308, "norm_loss": 0.0, "num_token_doc": 66.8427, "num_token_overlap": 15.8803, "num_token_query": 42.4864, "num_token_union": 68.6264, "num_word_context": 202.3929, "num_word_doc": 49.9308, "num_word_query": 32.0938, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10081.8806, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2476, "query_norm": 1.3825, "queue_k_norm": 1.42, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4864, "sent_len_1": 66.8427, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.3787, "stdk": 0.0478, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2237, "doc_norm": 1.4149, "encoder_q-embeddings": 6484.8711, "encoder_q-layer.0": 4574.5415, "encoder_q-layer.1": 5006.6045, "encoder_q-layer.10": 4922.0356, "encoder_q-layer.11": 11032.1035, "encoder_q-layer.2": 5601.8599, "encoder_q-layer.3": 6372.106, "encoder_q-layer.4": 6816.4624, "encoder_q-layer.5": 6354.5996, "encoder_q-layer.6": 6246.6709, "encoder_q-layer.7": 5721.2461, "encoder_q-layer.8": 6147.2974, "encoder_q-layer.9": 4940.6523, "epoch": 0.44, "inbatch_neg_score": 0.2442, "inbatch_pos_score": 0.8931, "learning_rate": 3.0722222222222227e-05, "loss": 3.2237, "norm_diff": 0.0476, "norm_loss": 0.0, "num_token_doc": 66.9477, "num_token_overlap": 15.9054, "num_token_query": 42.5834, "num_token_union": 68.6906, "num_word_context": 202.3814, "num_word_doc": 50.0142, "num_word_query": 32.1624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9536.958, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2433, "query_norm": 1.3672, "queue_k_norm": 1.4201, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5834, "sent_len_1": 66.9477, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6075, "stdk": 0.0478, "stdq": 0.0444, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2141, "doc_norm": 1.4201, "encoder_q-embeddings": 5371.6714, "encoder_q-layer.0": 3741.03, "encoder_q-layer.1": 4042.3308, "encoder_q-layer.10": 4520.5674, "encoder_q-layer.11": 10220.9336, "encoder_q-layer.2": 4552.1924, "encoder_q-layer.3": 4544.0161, "encoder_q-layer.4": 4737.7979, "encoder_q-layer.5": 4946.3408, "encoder_q-layer.6": 4701.9243, "encoder_q-layer.7": 4703.2529, "encoder_q-layer.8": 5319.6426, "encoder_q-layer.9": 4701.8853, "epoch": 0.44, "inbatch_neg_score": 0.2458, "inbatch_pos_score": 0.8945, "learning_rate": 3.066666666666667e-05, "loss": 3.2141, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.7102, "num_token_overlap": 15.8166, "num_token_query": 42.3224, "num_token_union": 68.4751, "num_word_context": 202.2229, "num_word_doc": 49.8138, "num_word_query": 31.9856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7963.5238, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2435, "query_norm": 1.3704, "queue_k_norm": 1.4219, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3224, "sent_len_1": 66.7102, "sent_len_max_0": 128.0, "sent_len_max_1": 187.7, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2358, "doc_norm": 1.4163, "encoder_q-embeddings": 5993.418, "encoder_q-layer.0": 3934.2534, "encoder_q-layer.1": 4207.0337, "encoder_q-layer.10": 4849.8052, "encoder_q-layer.11": 10651.043, "encoder_q-layer.2": 4821.2793, "encoder_q-layer.3": 4864.7402, "encoder_q-layer.4": 4875.3643, "encoder_q-layer.5": 4868.0879, "encoder_q-layer.6": 4838.936, "encoder_q-layer.7": 5388.1592, "encoder_q-layer.8": 5415.3584, "encoder_q-layer.9": 4786.0874, "epoch": 0.44, "inbatch_neg_score": 0.2549, "inbatch_pos_score": 0.8877, "learning_rate": 3.061111111111111e-05, "loss": 3.2358, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.7319, "num_token_overlap": 15.7717, "num_token_query": 42.2128, "num_token_union": 68.4304, "num_word_context": 202.1181, "num_word_doc": 49.7739, "num_word_query": 31.8889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8292.2435, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2542, "query_norm": 1.3551, "queue_k_norm": 1.4211, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2128, "sent_len_1": 66.7319, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.0263, "stdk": 0.0478, "stdq": 0.0437, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.1988, "doc_norm": 1.4213, "encoder_q-embeddings": 18132.7617, "encoder_q-layer.0": 12106.1016, "encoder_q-layer.1": 11310.5977, "encoder_q-layer.10": 5227.9971, "encoder_q-layer.11": 11009.4971, "encoder_q-layer.2": 13828.9434, "encoder_q-layer.3": 15294.917, "encoder_q-layer.4": 15431.4385, "encoder_q-layer.5": 17427.4766, "encoder_q-layer.6": 13712.1963, "encoder_q-layer.7": 13009.3291, "encoder_q-layer.8": 13244.7607, "encoder_q-layer.9": 7428.9321, "epoch": 0.44, "inbatch_neg_score": 0.2475, "inbatch_pos_score": 0.9253, "learning_rate": 3.055555555555556e-05, "loss": 3.1988, "norm_diff": 0.0217, "norm_loss": 0.0, "num_token_doc": 66.768, "num_token_overlap": 15.9695, "num_token_query": 42.585, "num_token_union": 68.5594, "num_word_context": 202.4344, "num_word_doc": 49.8427, "num_word_query": 32.1524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20730.0943, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.2485, "query_norm": 1.3996, "queue_k_norm": 1.4221, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.585, "sent_len_1": 66.768, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.4487, "stdk": 0.0481, "stdq": 0.0457, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.2106, "doc_norm": 1.4221, "encoder_q-embeddings": 4928.2212, "encoder_q-layer.0": 3053.8992, "encoder_q-layer.1": 3237.6501, "encoder_q-layer.10": 4838.6758, "encoder_q-layer.11": 10630.7344, "encoder_q-layer.2": 3653.9763, "encoder_q-layer.3": 3731.0046, "encoder_q-layer.4": 3965.4253, "encoder_q-layer.5": 4008.416, "encoder_q-layer.6": 4289.4849, "encoder_q-layer.7": 4689.7148, "encoder_q-layer.8": 5429.8032, "encoder_q-layer.9": 4806.8794, "epoch": 0.44, "inbatch_neg_score": 0.2539, "inbatch_pos_score": 0.9072, "learning_rate": 3.05e-05, "loss": 3.2106, "norm_diff": 0.0372, "norm_loss": 0.0, "num_token_doc": 66.87, "num_token_overlap": 15.9008, "num_token_query": 42.3158, "num_token_union": 68.4463, "num_word_context": 201.9003, "num_word_doc": 49.8686, "num_word_query": 31.9922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7628.5801, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2527, "query_norm": 1.387, "queue_k_norm": 1.4232, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3158, "sent_len_1": 66.87, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.6463, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.2276, "doc_norm": 1.4281, "encoder_q-embeddings": 6269.3018, "encoder_q-layer.0": 4174.0928, "encoder_q-layer.1": 4739.0508, "encoder_q-layer.10": 4916.123, "encoder_q-layer.11": 11392.7295, "encoder_q-layer.2": 5452.6997, "encoder_q-layer.3": 5583.0698, "encoder_q-layer.4": 5682.6191, "encoder_q-layer.5": 5301.8208, "encoder_q-layer.6": 6070.8716, "encoder_q-layer.7": 6146.4888, "encoder_q-layer.8": 6306.7007, "encoder_q-layer.9": 5319.4492, "epoch": 0.44, "inbatch_neg_score": 0.2427, "inbatch_pos_score": 0.8872, "learning_rate": 3.044444444444445e-05, "loss": 3.2276, "norm_diff": 0.0569, "norm_loss": 0.0, "num_token_doc": 66.9067, "num_token_overlap": 15.7712, "num_token_query": 42.1555, "num_token_union": 68.539, "num_word_context": 202.5176, "num_word_doc": 50.0017, "num_word_query": 31.8389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9212.5826, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2433, "query_norm": 1.3712, "queue_k_norm": 1.4232, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1555, "sent_len_1": 66.9067, "sent_len_max_0": 128.0, "sent_len_max_1": 188.01, "stdk": 0.0483, "stdq": 0.0446, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.2111, "doc_norm": 1.4198, "encoder_q-embeddings": 4518.9219, "encoder_q-layer.0": 2852.1423, "encoder_q-layer.1": 3016.4595, "encoder_q-layer.10": 5004.3047, "encoder_q-layer.11": 11003.1592, "encoder_q-layer.2": 3355.6462, "encoder_q-layer.3": 3613.0107, "encoder_q-layer.4": 3678.6948, "encoder_q-layer.5": 3758.5496, "encoder_q-layer.6": 4187.6094, "encoder_q-layer.7": 4987.0068, "encoder_q-layer.8": 5284.1895, "encoder_q-layer.9": 4624.0317, "epoch": 0.44, "inbatch_neg_score": 0.2466, "inbatch_pos_score": 0.9277, "learning_rate": 3.0388888888888887e-05, "loss": 3.2111, "norm_diff": 0.0274, "norm_loss": 0.0, "num_token_doc": 66.774, "num_token_overlap": 15.88, "num_token_query": 42.4345, "num_token_union": 68.5704, "num_word_context": 202.5719, "num_word_doc": 49.8869, "num_word_query": 32.0852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7565.7484, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2487, "query_norm": 1.3924, "queue_k_norm": 1.4238, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4345, "sent_len_1": 66.774, "sent_len_max_0": 127.995, "sent_len_max_1": 187.1113, "stdk": 0.048, "stdq": 0.0453, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2026, "doc_norm": 1.4178, "encoder_q-embeddings": 7670.4673, "encoder_q-layer.0": 5737.3037, "encoder_q-layer.1": 5989.4092, "encoder_q-layer.10": 4780.271, "encoder_q-layer.11": 10809.5352, "encoder_q-layer.2": 7012.6411, "encoder_q-layer.3": 7054.5215, "encoder_q-layer.4": 7015.5376, "encoder_q-layer.5": 6488.6265, "encoder_q-layer.6": 6431.9722, "encoder_q-layer.7": 5720.7275, "encoder_q-layer.8": 5574.4116, "encoder_q-layer.9": 4894.6851, "epoch": 0.44, "inbatch_neg_score": 0.2593, "inbatch_pos_score": 0.917, "learning_rate": 3.0333333333333337e-05, "loss": 3.2026, "norm_diff": 0.0384, "norm_loss": 0.0, "num_token_doc": 66.8394, "num_token_overlap": 15.8838, "num_token_query": 42.3686, "num_token_union": 68.5024, "num_word_context": 202.3862, "num_word_doc": 49.8865, "num_word_query": 31.972, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10042.9646, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2581, "query_norm": 1.3793, "queue_k_norm": 1.4232, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3686, "sent_len_1": 66.8394, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.34, "stdk": 0.0478, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.227, "doc_norm": 1.4212, "encoder_q-embeddings": 6563.8281, "encoder_q-layer.0": 4378.3721, "encoder_q-layer.1": 5190.2124, "encoder_q-layer.10": 4506.416, "encoder_q-layer.11": 9986.0488, "encoder_q-layer.2": 6121.6875, "encoder_q-layer.3": 6296.2441, "encoder_q-layer.4": 5955.0898, "encoder_q-layer.5": 5657.3818, "encoder_q-layer.6": 6172.3203, "encoder_q-layer.7": 6092.9053, "encoder_q-layer.8": 5808.7588, "encoder_q-layer.9": 4613.0254, "epoch": 0.44, "inbatch_neg_score": 0.2627, "inbatch_pos_score": 0.9419, "learning_rate": 3.0277777777777776e-05, "loss": 3.227, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.8001, "num_token_overlap": 15.8627, "num_token_query": 42.3355, "num_token_union": 68.4853, "num_word_context": 202.1884, "num_word_doc": 49.8187, "num_word_query": 31.9801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9127.5937, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2634, "query_norm": 1.3887, "queue_k_norm": 1.4225, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3355, "sent_len_1": 66.8001, "sent_len_max_0": 127.9712, "sent_len_max_1": 190.9025, "stdk": 0.048, "stdq": 0.0451, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.2274, "doc_norm": 1.4212, "encoder_q-embeddings": 6250.311, "encoder_q-layer.0": 4381.4302, "encoder_q-layer.1": 5153.8823, "encoder_q-layer.10": 2416.061, "encoder_q-layer.11": 5331.0464, "encoder_q-layer.2": 6324.4717, "encoder_q-layer.3": 6788.5049, "encoder_q-layer.4": 6993.792, "encoder_q-layer.5": 6498.1968, "encoder_q-layer.6": 6481.0991, "encoder_q-layer.7": 6172.0015, "encoder_q-layer.8": 4985.2046, "encoder_q-layer.9": 3047.3706, "epoch": 0.45, "inbatch_neg_score": 0.2673, "inbatch_pos_score": 0.9253, "learning_rate": 3.0222222222222225e-05, "loss": 3.2274, "norm_diff": 0.0448, "norm_loss": 0.0, "num_token_doc": 66.8973, "num_token_overlap": 15.8553, "num_token_query": 42.3706, "num_token_union": 68.5496, "num_word_context": 202.3736, "num_word_doc": 49.9338, "num_word_query": 32.0238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8415.2283, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2676, "query_norm": 1.3764, "queue_k_norm": 1.4242, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3706, "sent_len_1": 66.8973, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2887, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2196, "doc_norm": 1.421, "encoder_q-embeddings": 3448.0645, "encoder_q-layer.0": 2468.0623, "encoder_q-layer.1": 2879.5469, "encoder_q-layer.10": 1185.1581, "encoder_q-layer.11": 2749.7368, "encoder_q-layer.2": 3293.8694, "encoder_q-layer.3": 3405.5305, "encoder_q-layer.4": 3692.9348, "encoder_q-layer.5": 3468.325, "encoder_q-layer.6": 4026.5132, "encoder_q-layer.7": 3388.8276, "encoder_q-layer.8": 1714.2593, "encoder_q-layer.9": 1241.0779, "epoch": 0.45, "inbatch_neg_score": 0.2806, "inbatch_pos_score": 0.9404, "learning_rate": 3.016666666666667e-05, "loss": 3.2196, "norm_diff": 0.0314, "norm_loss": 0.0, "num_token_doc": 66.6041, "num_token_overlap": 15.83, "num_token_query": 42.2821, "num_token_union": 68.3286, "num_word_context": 202.238, "num_word_doc": 49.696, "num_word_query": 31.9385, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4427.288, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2803, "query_norm": 1.3896, "queue_k_norm": 1.4244, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2821, "sent_len_1": 66.6041, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6975, "stdk": 0.0479, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.2034, "doc_norm": 1.4225, "encoder_q-embeddings": 4145.0088, "encoder_q-layer.0": 3108.0398, "encoder_q-layer.1": 3398.3699, "encoder_q-layer.10": 1347.1149, "encoder_q-layer.11": 2906.3652, "encoder_q-layer.2": 4276.8848, "encoder_q-layer.3": 4460.1392, "encoder_q-layer.4": 4918.2891, "encoder_q-layer.5": 5711.1484, "encoder_q-layer.6": 4640.3335, "encoder_q-layer.7": 3248.4585, "encoder_q-layer.8": 2047.6976, "encoder_q-layer.9": 1281.6577, "epoch": 0.45, "inbatch_neg_score": 0.2858, "inbatch_pos_score": 0.9214, "learning_rate": 3.0111111111111113e-05, "loss": 3.2034, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 67.0221, "num_token_overlap": 15.858, "num_token_query": 42.3029, "num_token_union": 68.5623, "num_word_context": 202.2939, "num_word_doc": 50.0458, "num_word_query": 31.9482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5512.7276, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2837, "query_norm": 1.3778, "queue_k_norm": 1.4272, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3029, "sent_len_1": 67.0221, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.84, "stdk": 0.0479, "stdq": 0.0442, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.2144, "doc_norm": 1.4286, "encoder_q-embeddings": 10188.5713, "encoder_q-layer.0": 7037.5127, "encoder_q-layer.1": 7198.1382, "encoder_q-layer.10": 1434.7095, "encoder_q-layer.11": 2938.3835, "encoder_q-layer.2": 9034.5117, "encoder_q-layer.3": 9473.0068, "encoder_q-layer.4": 9931.1338, "encoder_q-layer.5": 9391.4375, "encoder_q-layer.6": 8725.5332, "encoder_q-layer.7": 7448.6157, "encoder_q-layer.8": 4954.0859, "encoder_q-layer.9": 2213.8875, "epoch": 0.45, "inbatch_neg_score": 0.292, "inbatch_pos_score": 0.9546, "learning_rate": 3.005555555555556e-05, "loss": 3.2144, "norm_diff": 0.0459, "norm_loss": 0.0, "num_token_doc": 66.7668, "num_token_overlap": 15.861, "num_token_query": 42.4489, "num_token_union": 68.569, "num_word_context": 202.3165, "num_word_doc": 49.821, "num_word_query": 32.0695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11582.9774, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2917, "query_norm": 1.3828, "queue_k_norm": 1.4267, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4489, "sent_len_1": 66.7668, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.605, "stdk": 0.0481, "stdq": 0.0443, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 49.2188, "active_queue_size": 16384.0, "cl_loss": 3.217, "doc_norm": 1.4237, "encoder_q-embeddings": 1418.3473, "encoder_q-layer.0": 976.2391, "encoder_q-layer.1": 984.2333, "encoder_q-layer.10": 1268.4503, "encoder_q-layer.11": 2780.4526, "encoder_q-layer.2": 1092.2546, "encoder_q-layer.3": 1092.111, "encoder_q-layer.4": 1187.8276, "encoder_q-layer.5": 1199.946, "encoder_q-layer.6": 1249.7814, "encoder_q-layer.7": 1290.5648, "encoder_q-layer.8": 1400.167, "encoder_q-layer.9": 1251.8175, "epoch": 0.45, "inbatch_neg_score": 0.2959, "inbatch_pos_score": 0.9102, "learning_rate": 3e-05, "loss": 3.217, "norm_diff": 0.0377, "norm_loss": 0.0, "num_token_doc": 66.8498, "num_token_overlap": 15.7735, "num_token_query": 42.2591, "num_token_union": 68.4852, "num_word_context": 202.3287, "num_word_doc": 49.8978, "num_word_query": 31.9303, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2116.9011, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3859, "queue_k_norm": 1.4297, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2591, "sent_len_1": 66.8498, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.4288, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2249, "doc_norm": 1.429, "encoder_q-embeddings": 7008.6963, "encoder_q-layer.0": 5148.9878, "encoder_q-layer.1": 6501.4629, "encoder_q-layer.10": 1217.3864, "encoder_q-layer.11": 2693.2454, "encoder_q-layer.2": 7246.7896, "encoder_q-layer.3": 7678.0786, "encoder_q-layer.4": 8420.7891, "encoder_q-layer.5": 8960.5723, "encoder_q-layer.6": 8401.3809, "encoder_q-layer.7": 8015.1328, "encoder_q-layer.8": 3135.9895, "encoder_q-layer.9": 1425.5365, "epoch": 0.45, "inbatch_neg_score": 0.2972, "inbatch_pos_score": 0.9463, "learning_rate": 2.9944444444444446e-05, "loss": 3.2249, "norm_diff": 0.0403, "norm_loss": 0.0, "num_token_doc": 66.8977, "num_token_overlap": 15.7912, "num_token_query": 42.3024, "num_token_union": 68.592, "num_word_context": 202.3195, "num_word_doc": 49.9338, "num_word_query": 31.9604, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9432.5288, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2959, "query_norm": 1.3886, "queue_k_norm": 1.432, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3024, "sent_len_1": 66.8977, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3487, "stdk": 0.048, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.2062, "doc_norm": 1.4227, "encoder_q-embeddings": 1913.2098, "encoder_q-layer.0": 1343.7429, "encoder_q-layer.1": 1525.6804, "encoder_q-layer.10": 1309.0181, "encoder_q-layer.11": 2625.6716, "encoder_q-layer.2": 1819.2838, "encoder_q-layer.3": 1906.7069, "encoder_q-layer.4": 2084.8301, "encoder_q-layer.5": 2123.1382, "encoder_q-layer.6": 2055.1267, "encoder_q-layer.7": 2180.6213, "encoder_q-layer.8": 2022.6036, "encoder_q-layer.9": 1354.4454, "epoch": 0.45, "inbatch_neg_score": 0.2867, "inbatch_pos_score": 0.9307, "learning_rate": 2.988888888888889e-05, "loss": 3.2062, "norm_diff": 0.05, "norm_loss": 0.0, "num_token_doc": 66.8519, "num_token_overlap": 15.8094, "num_token_query": 42.2318, "num_token_union": 68.4997, "num_word_context": 202.3785, "num_word_doc": 49.9075, "num_word_query": 31.8977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2785.7764, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.3727, "queue_k_norm": 1.4318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2318, "sent_len_1": 66.8519, "sent_len_max_0": 127.985, "sent_len_max_1": 188.9325, "stdk": 0.0478, "stdq": 0.0441, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.209, "doc_norm": 1.4378, "encoder_q-embeddings": 2485.1191, "encoder_q-layer.0": 1599.3201, "encoder_q-layer.1": 1838.1709, "encoder_q-layer.10": 1383.166, "encoder_q-layer.11": 2711.0032, "encoder_q-layer.2": 2129.5874, "encoder_q-layer.3": 2338.7439, "encoder_q-layer.4": 2656.0544, "encoder_q-layer.5": 3230.9888, "encoder_q-layer.6": 3928.7007, "encoder_q-layer.7": 3922.1277, "encoder_q-layer.8": 2490.7593, "encoder_q-layer.9": 1305.9293, "epoch": 0.45, "inbatch_neg_score": 0.2837, "inbatch_pos_score": 0.9644, "learning_rate": 2.9833333333333335e-05, "loss": 3.209, "norm_diff": 0.0393, "norm_loss": 0.0, "num_token_doc": 66.8562, "num_token_overlap": 15.8445, "num_token_query": 42.4038, "num_token_union": 68.5706, "num_word_context": 202.3134, "num_word_doc": 49.8504, "num_word_query": 32.0409, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3764.5824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2839, "query_norm": 1.3985, "queue_k_norm": 1.4314, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4038, "sent_len_1": 66.8562, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7, "stdk": 0.0483, "stdq": 0.0453, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.2181, "doc_norm": 1.4305, "encoder_q-embeddings": 2740.8909, "encoder_q-layer.0": 1948.9648, "encoder_q-layer.1": 2032.1483, "encoder_q-layer.10": 1147.4333, "encoder_q-layer.11": 2624.2615, "encoder_q-layer.2": 2314.5166, "encoder_q-layer.3": 2437.0471, "encoder_q-layer.4": 2605.4534, "encoder_q-layer.5": 2462.2017, "encoder_q-layer.6": 2561.2456, "encoder_q-layer.7": 2017.9611, "encoder_q-layer.8": 1517.4924, "encoder_q-layer.9": 1193.4552, "epoch": 0.45, "inbatch_neg_score": 0.2887, "inbatch_pos_score": 0.9556, "learning_rate": 2.9777777777777777e-05, "loss": 3.2181, "norm_diff": 0.0192, "norm_loss": 0.0, "num_token_doc": 66.8679, "num_token_overlap": 15.8619, "num_token_query": 42.3234, "num_token_union": 68.5853, "num_word_context": 202.2737, "num_word_doc": 49.898, "num_word_query": 31.9661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3347.1301, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2883, "query_norm": 1.4142, "queue_k_norm": 1.4326, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3234, "sent_len_1": 66.8679, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8487, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2373, "doc_norm": 1.4266, "encoder_q-embeddings": 1049.897, "encoder_q-layer.0": 667.3515, "encoder_q-layer.1": 698.8749, "encoder_q-layer.10": 1237.4349, "encoder_q-layer.11": 2742.9912, "encoder_q-layer.2": 763.6588, "encoder_q-layer.3": 791.7159, "encoder_q-layer.4": 817.8383, "encoder_q-layer.5": 854.408, "encoder_q-layer.6": 964.7963, "encoder_q-layer.7": 1056.5386, "encoder_q-layer.8": 1240.4366, "encoder_q-layer.9": 1151.6691, "epoch": 0.45, "inbatch_neg_score": 0.2796, "inbatch_pos_score": 0.9336, "learning_rate": 2.9722222222222223e-05, "loss": 3.2373, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.5434, "num_token_overlap": 15.7864, "num_token_query": 42.313, "num_token_union": 68.3882, "num_word_context": 201.9507, "num_word_doc": 49.6417, "num_word_query": 31.9655, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1794.5393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2795, "query_norm": 1.3853, "queue_k_norm": 1.4317, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.313, "sent_len_1": 66.5434, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.5238, "stdk": 0.0478, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.2064, "doc_norm": 1.4325, "encoder_q-embeddings": 1216.4695, "encoder_q-layer.0": 803.384, "encoder_q-layer.1": 866.524, "encoder_q-layer.10": 1226.881, "encoder_q-layer.11": 2672.6917, "encoder_q-layer.2": 995.2968, "encoder_q-layer.3": 1035.0884, "encoder_q-layer.4": 1086.0964, "encoder_q-layer.5": 1112.8201, "encoder_q-layer.6": 1215.452, "encoder_q-layer.7": 1239.2357, "encoder_q-layer.8": 1305.6431, "encoder_q-layer.9": 1205.3862, "epoch": 0.45, "inbatch_neg_score": 0.277, "inbatch_pos_score": 0.9502, "learning_rate": 2.9666666666666672e-05, "loss": 3.2064, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 66.8238, "num_token_overlap": 15.8473, "num_token_query": 42.3271, "num_token_union": 68.4846, "num_word_context": 202.3341, "num_word_doc": 49.8852, "num_word_query": 31.9631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1928.154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2766, "query_norm": 1.3888, "queue_k_norm": 1.4333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3271, "sent_len_1": 66.8238, "sent_len_max_0": 128.0, "sent_len_max_1": 187.3512, "stdk": 0.0481, "stdq": 0.0451, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.2135, "doc_norm": 1.4344, "encoder_q-embeddings": 1094.0076, "encoder_q-layer.0": 739.7809, "encoder_q-layer.1": 801.3401, "encoder_q-layer.10": 1116.8611, "encoder_q-layer.11": 2556.0222, "encoder_q-layer.2": 921.6498, "encoder_q-layer.3": 988.0548, "encoder_q-layer.4": 1100.0854, "encoder_q-layer.5": 1113.7356, "encoder_q-layer.6": 1088.8722, "encoder_q-layer.7": 1125.3462, "encoder_q-layer.8": 1251.67, "encoder_q-layer.9": 1119.6703, "epoch": 0.46, "inbatch_neg_score": 0.2759, "inbatch_pos_score": 0.9438, "learning_rate": 2.961111111111111e-05, "loss": 3.2135, "norm_diff": 0.0639, "norm_loss": 0.0, "num_token_doc": 66.8878, "num_token_overlap": 15.8622, "num_token_query": 42.5146, "num_token_union": 68.6365, "num_word_context": 202.454, "num_word_doc": 49.9172, "num_word_query": 32.1142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1837.1235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2766, "query_norm": 1.3706, "queue_k_norm": 1.4332, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5146, "sent_len_1": 66.8878, "sent_len_max_0": 127.99, "sent_len_max_1": 189.4963, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1929, "doc_norm": 1.4295, "encoder_q-embeddings": 8412.0127, "encoder_q-layer.0": 5651.9634, "encoder_q-layer.1": 5852.8403, "encoder_q-layer.10": 1504.5076, "encoder_q-layer.11": 2859.8501, "encoder_q-layer.2": 6941.3672, "encoder_q-layer.3": 7056.3105, "encoder_q-layer.4": 8355.7266, "encoder_q-layer.5": 7521.8306, "encoder_q-layer.6": 5383.4043, "encoder_q-layer.7": 5241.479, "encoder_q-layer.8": 4613.4678, "encoder_q-layer.9": 2477.1819, "epoch": 0.46, "inbatch_neg_score": 0.2794, "inbatch_pos_score": 0.9463, "learning_rate": 2.955555555555556e-05, "loss": 3.1929, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.8468, "num_token_overlap": 15.8629, "num_token_query": 42.4, "num_token_union": 68.5677, "num_word_context": 202.4415, "num_word_doc": 49.8754, "num_word_query": 31.9846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9038.7523, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2795, "query_norm": 1.3952, "queue_k_norm": 1.4337, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4, "sent_len_1": 66.8468, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7175, "stdk": 0.0479, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.218, "doc_norm": 1.4275, "encoder_q-embeddings": 1647.928, "encoder_q-layer.0": 1070.9526, "encoder_q-layer.1": 1237.4827, "encoder_q-layer.10": 1242.8334, "encoder_q-layer.11": 2837.2964, "encoder_q-layer.2": 1471.6649, "encoder_q-layer.3": 1663.8987, "encoder_q-layer.4": 1798.4305, "encoder_q-layer.5": 1962.1954, "encoder_q-layer.6": 2024.9423, "encoder_q-layer.7": 2096.5334, "encoder_q-layer.8": 1865.4746, "encoder_q-layer.9": 1284.1885, "epoch": 0.46, "inbatch_neg_score": 0.2835, "inbatch_pos_score": 0.9111, "learning_rate": 2.95e-05, "loss": 3.218, "norm_diff": 0.0488, "norm_loss": 0.0, "num_token_doc": 66.83, "num_token_overlap": 15.7824, "num_token_query": 42.3727, "num_token_union": 68.5428, "num_word_context": 202.157, "num_word_doc": 49.8145, "num_word_query": 32.018, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2643.5176, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2825, "query_norm": 1.3787, "queue_k_norm": 1.4351, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3727, "sent_len_1": 66.83, "sent_len_max_0": 128.0, "sent_len_max_1": 192.6488, "stdk": 0.0478, "stdq": 0.0445, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.207, "doc_norm": 1.4353, "encoder_q-embeddings": 2730.9109, "encoder_q-layer.0": 1955.468, "encoder_q-layer.1": 2268.6143, "encoder_q-layer.10": 1175.3289, "encoder_q-layer.11": 2635.4277, "encoder_q-layer.2": 3094.0144, "encoder_q-layer.3": 3139.2629, "encoder_q-layer.4": 3266.6245, "encoder_q-layer.5": 3585.5835, "encoder_q-layer.6": 2892.3928, "encoder_q-layer.7": 2508.5798, "encoder_q-layer.8": 2046.6719, "encoder_q-layer.9": 1399.7141, "epoch": 0.46, "inbatch_neg_score": 0.2841, "inbatch_pos_score": 0.9531, "learning_rate": 2.9444444444444448e-05, "loss": 3.207, "norm_diff": 0.055, "norm_loss": 0.0, "num_token_doc": 66.9918, "num_token_overlap": 15.8677, "num_token_query": 42.421, "num_token_union": 68.61, "num_word_context": 202.4739, "num_word_doc": 49.9684, "num_word_query": 32.0343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3868.0782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.3803, "queue_k_norm": 1.4347, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.421, "sent_len_1": 66.9918, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.385, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.2004, "doc_norm": 1.4399, "encoder_q-embeddings": 1034.5282, "encoder_q-layer.0": 674.8609, "encoder_q-layer.1": 712.0003, "encoder_q-layer.10": 1132.5339, "encoder_q-layer.11": 2724.8123, "encoder_q-layer.2": 799.5944, "encoder_q-layer.3": 845.7656, "encoder_q-layer.4": 867.23, "encoder_q-layer.5": 919.4294, "encoder_q-layer.6": 1015.9963, "encoder_q-layer.7": 1095.7244, "encoder_q-layer.8": 1284.7642, "encoder_q-layer.9": 1177.2535, "epoch": 0.46, "inbatch_neg_score": 0.2852, "inbatch_pos_score": 0.9517, "learning_rate": 2.9388888888888887e-05, "loss": 3.2004, "norm_diff": 0.0525, "norm_loss": 0.0, "num_token_doc": 66.6444, "num_token_overlap": 15.835, "num_token_query": 42.3992, "num_token_union": 68.4995, "num_word_context": 202.2125, "num_word_doc": 49.7087, "num_word_query": 32.023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1832.3071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2844, "query_norm": 1.3874, "queue_k_norm": 1.4348, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3992, "sent_len_1": 66.6444, "sent_len_max_0": 127.9775, "sent_len_max_1": 188.3413, "stdk": 0.0483, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.2063, "doc_norm": 1.4381, "encoder_q-embeddings": 1077.4441, "encoder_q-layer.0": 682.6049, "encoder_q-layer.1": 723.4036, "encoder_q-layer.10": 1177.7587, "encoder_q-layer.11": 2669.7007, "encoder_q-layer.2": 807.8083, "encoder_q-layer.3": 809.7872, "encoder_q-layer.4": 844.6107, "encoder_q-layer.5": 860.6154, "encoder_q-layer.6": 945.0882, "encoder_q-layer.7": 1043.5629, "encoder_q-layer.8": 1227.8192, "encoder_q-layer.9": 1163.4736, "epoch": 0.46, "inbatch_neg_score": 0.2882, "inbatch_pos_score": 0.9624, "learning_rate": 2.9333333333333336e-05, "loss": 3.2063, "norm_diff": 0.0524, "norm_loss": 0.0, "num_token_doc": 66.9039, "num_token_overlap": 15.876, "num_token_query": 42.3176, "num_token_union": 68.5642, "num_word_context": 202.573, "num_word_doc": 49.926, "num_word_query": 31.9527, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1812.4417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2883, "query_norm": 1.3856, "queue_k_norm": 1.4335, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3176, "sent_len_1": 66.9039, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3212, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2259, "doc_norm": 1.4324, "encoder_q-embeddings": 1129.1263, "encoder_q-layer.0": 748.6874, "encoder_q-layer.1": 817.2181, "encoder_q-layer.10": 1272.952, "encoder_q-layer.11": 2793.6238, "encoder_q-layer.2": 923.4192, "encoder_q-layer.3": 943.5504, "encoder_q-layer.4": 994.2913, "encoder_q-layer.5": 1050.1335, "encoder_q-layer.6": 1135.2202, "encoder_q-layer.7": 1170.6345, "encoder_q-layer.8": 1274.2107, "encoder_q-layer.9": 1158.5205, "epoch": 0.46, "inbatch_neg_score": 0.295, "inbatch_pos_score": 0.9575, "learning_rate": 2.927777777777778e-05, "loss": 3.2259, "norm_diff": 0.0364, "norm_loss": 0.0, "num_token_doc": 66.696, "num_token_overlap": 15.7694, "num_token_query": 42.1453, "num_token_union": 68.3616, "num_word_context": 202.1539, "num_word_doc": 49.8012, "num_word_query": 31.8368, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1917.5184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.3984, "queue_k_norm": 1.4333, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1453, "sent_len_1": 66.696, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.7275, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.2054, "doc_norm": 1.4367, "encoder_q-embeddings": 1704.0795, "encoder_q-layer.0": 1156.5693, "encoder_q-layer.1": 1184.1345, "encoder_q-layer.10": 1151.6034, "encoder_q-layer.11": 2693.4597, "encoder_q-layer.2": 1292.0801, "encoder_q-layer.3": 1267.2994, "encoder_q-layer.4": 1250.2556, "encoder_q-layer.5": 1212.842, "encoder_q-layer.6": 1261.675, "encoder_q-layer.7": 1193.9597, "encoder_q-layer.8": 1281.7051, "encoder_q-layer.9": 1153.9706, "epoch": 0.46, "inbatch_neg_score": 0.2964, "inbatch_pos_score": 0.9673, "learning_rate": 2.9222222222222224e-05, "loss": 3.2054, "norm_diff": 0.0377, "norm_loss": 0.0, "num_token_doc": 66.8867, "num_token_overlap": 15.86, "num_token_query": 42.3368, "num_token_union": 68.5445, "num_word_context": 202.7911, "num_word_doc": 49.9737, "num_word_query": 32.0161, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2186.6203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2954, "query_norm": 1.3989, "queue_k_norm": 1.4362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3368, "sent_len_1": 66.8867, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0863, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1988, "doc_norm": 1.4379, "encoder_q-embeddings": 1663.8835, "encoder_q-layer.0": 1193.9443, "encoder_q-layer.1": 1461.8937, "encoder_q-layer.10": 1284.4457, "encoder_q-layer.11": 2710.0457, "encoder_q-layer.2": 1686.174, "encoder_q-layer.3": 1864.2335, "encoder_q-layer.4": 1885.6508, "encoder_q-layer.5": 1697.5469, "encoder_q-layer.6": 1676.7424, "encoder_q-layer.7": 1816.2443, "encoder_q-layer.8": 1764.3251, "encoder_q-layer.9": 1328.8392, "epoch": 0.46, "inbatch_neg_score": 0.2916, "inbatch_pos_score": 0.9707, "learning_rate": 2.916666666666667e-05, "loss": 3.1988, "norm_diff": 0.0418, "norm_loss": 0.0, "num_token_doc": 66.8577, "num_token_overlap": 15.9293, "num_token_query": 42.5292, "num_token_union": 68.573, "num_word_context": 202.5938, "num_word_doc": 49.9292, "num_word_query": 32.1336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2574.8876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2942, "query_norm": 1.3961, "queue_k_norm": 1.438, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5292, "sent_len_1": 66.8577, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9988, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.2073, "doc_norm": 1.434, "encoder_q-embeddings": 1296.7085, "encoder_q-layer.0": 861.3936, "encoder_q-layer.1": 941.6212, "encoder_q-layer.10": 1275.4462, "encoder_q-layer.11": 2807.0647, "encoder_q-layer.2": 1077.4174, "encoder_q-layer.3": 1116.476, "encoder_q-layer.4": 1166.8567, "encoder_q-layer.5": 1186.8695, "encoder_q-layer.6": 1290.959, "encoder_q-layer.7": 1340.1101, "encoder_q-layer.8": 1463.5336, "encoder_q-layer.9": 1233.8971, "epoch": 0.46, "inbatch_neg_score": 0.2944, "inbatch_pos_score": 0.9595, "learning_rate": 2.9111111111111112e-05, "loss": 3.2073, "norm_diff": 0.0256, "norm_loss": 0.0, "num_token_doc": 66.6063, "num_token_overlap": 15.7847, "num_token_query": 42.2925, "num_token_union": 68.4273, "num_word_context": 202.3189, "num_word_doc": 49.7047, "num_word_query": 31.9444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.3357, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2944, "query_norm": 1.4104, "queue_k_norm": 1.4365, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2925, "sent_len_1": 66.6063, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.7388, "stdk": 0.048, "stdq": 0.0458, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.2024, "doc_norm": 1.4428, "encoder_q-embeddings": 3441.3274, "encoder_q-layer.0": 2368.291, "encoder_q-layer.1": 2608.801, "encoder_q-layer.10": 2294.4917, "encoder_q-layer.11": 5440.2998, "encoder_q-layer.2": 3185.9548, "encoder_q-layer.3": 3268.416, "encoder_q-layer.4": 3544.9351, "encoder_q-layer.5": 3850.3411, "encoder_q-layer.6": 3608.7085, "encoder_q-layer.7": 3436.9207, "encoder_q-layer.8": 3241.8362, "encoder_q-layer.9": 2265.6318, "epoch": 0.47, "inbatch_neg_score": 0.2931, "inbatch_pos_score": 0.9736, "learning_rate": 2.9055555555555558e-05, "loss": 3.2024, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.6105, "num_token_overlap": 15.8534, "num_token_query": 42.3018, "num_token_union": 68.3302, "num_word_context": 201.9346, "num_word_doc": 49.7052, "num_word_query": 31.9307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5037.4881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2927, "query_norm": 1.3882, "queue_k_norm": 1.4393, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3018, "sent_len_1": 66.6105, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1887, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 52.6367, "active_queue_size": 16384.0, "cl_loss": 3.2133, "doc_norm": 1.436, "encoder_q-embeddings": 4601.5151, "encoder_q-layer.0": 3273.6138, "encoder_q-layer.1": 3634.4185, "encoder_q-layer.10": 2571.5503, "encoder_q-layer.11": 5504.0845, "encoder_q-layer.2": 3883.7898, "encoder_q-layer.3": 3822.5869, "encoder_q-layer.4": 3929.6106, "encoder_q-layer.5": 3987.0757, "encoder_q-layer.6": 3831.4844, "encoder_q-layer.7": 3308.542, "encoder_q-layer.8": 3177.0283, "encoder_q-layer.9": 2490.1626, "epoch": 0.47, "inbatch_neg_score": 0.2855, "inbatch_pos_score": 0.938, "learning_rate": 2.9e-05, "loss": 3.2133, "norm_diff": 0.0526, "norm_loss": 0.0, "num_token_doc": 66.7381, "num_token_overlap": 15.8572, "num_token_query": 42.5226, "num_token_union": 68.5301, "num_word_context": 202.3279, "num_word_doc": 49.7837, "num_word_query": 32.1156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5603.3769, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2854, "query_norm": 1.3833, "queue_k_norm": 1.4382, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5226, "sent_len_1": 66.7381, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8375, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.2098, "doc_norm": 1.4377, "encoder_q-embeddings": 2557.3096, "encoder_q-layer.0": 1714.2635, "encoder_q-layer.1": 1885.8214, "encoder_q-layer.10": 2223.2314, "encoder_q-layer.11": 5129.6821, "encoder_q-layer.2": 2149.1653, "encoder_q-layer.3": 2270.0762, "encoder_q-layer.4": 2438.511, "encoder_q-layer.5": 2550.0642, "encoder_q-layer.6": 2782.8445, "encoder_q-layer.7": 2828.5723, "encoder_q-layer.8": 2792.002, "encoder_q-layer.9": 2188.0469, "epoch": 0.47, "inbatch_neg_score": 0.2916, "inbatch_pos_score": 0.9517, "learning_rate": 2.8944444444444446e-05, "loss": 3.2098, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 66.9266, "num_token_overlap": 15.8183, "num_token_query": 42.3435, "num_token_union": 68.6299, "num_word_context": 202.5035, "num_word_doc": 49.9637, "num_word_query": 31.9794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4040.2363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.3511, "queue_k_norm": 1.4404, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3435, "sent_len_1": 66.9266, "sent_len_max_0": 127.985, "sent_len_max_1": 188.505, "stdk": 0.0481, "stdq": 0.0437, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.208, "doc_norm": 1.4363, "encoder_q-embeddings": 2292.9741, "encoder_q-layer.0": 1487.275, "encoder_q-layer.1": 1605.9658, "encoder_q-layer.10": 2591.2146, "encoder_q-layer.11": 5334.2554, "encoder_q-layer.2": 1840.3557, "encoder_q-layer.3": 1836.0496, "encoder_q-layer.4": 1863.4045, "encoder_q-layer.5": 1900.3666, "encoder_q-layer.6": 2107.0212, "encoder_q-layer.7": 2239.6536, "encoder_q-layer.8": 2534.125, "encoder_q-layer.9": 2313.6125, "epoch": 0.47, "inbatch_neg_score": 0.286, "inbatch_pos_score": 0.9653, "learning_rate": 2.8888888888888888e-05, "loss": 3.208, "norm_diff": 0.0481, "norm_loss": 0.0, "num_token_doc": 66.7861, "num_token_overlap": 15.9233, "num_token_query": 42.6376, "num_token_union": 68.5572, "num_word_context": 202.1987, "num_word_doc": 49.8436, "num_word_query": 32.2201, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3716.9852, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2876, "query_norm": 1.3882, "queue_k_norm": 1.4379, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.6376, "sent_len_1": 66.7861, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.305, "stdk": 0.0481, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 51.8555, "active_queue_size": 16384.0, "cl_loss": 3.2097, "doc_norm": 1.4412, "encoder_q-embeddings": 2173.7903, "encoder_q-layer.0": 1371.4452, "encoder_q-layer.1": 1467.4991, "encoder_q-layer.10": 2492.1602, "encoder_q-layer.11": 5542.7412, "encoder_q-layer.2": 1618.2935, "encoder_q-layer.3": 1665.9257, "encoder_q-layer.4": 1761.1531, "encoder_q-layer.5": 1867.6895, "encoder_q-layer.6": 2040.8539, "encoder_q-layer.7": 2209.1484, "encoder_q-layer.8": 2572.3855, "encoder_q-layer.9": 2319.9651, "epoch": 0.47, "inbatch_neg_score": 0.2795, "inbatch_pos_score": 0.9385, "learning_rate": 2.8833333333333334e-05, "loss": 3.2097, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.5736, "num_token_overlap": 15.7638, "num_token_query": 42.2478, "num_token_union": 68.3763, "num_word_context": 202.1005, "num_word_doc": 49.6426, "num_word_query": 31.9219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3647.8444, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2795, "query_norm": 1.3663, "queue_k_norm": 1.4388, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2478, "sent_len_1": 66.5736, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.7887, "stdk": 0.0482, "stdq": 0.0446, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.2195, "doc_norm": 1.4408, "encoder_q-embeddings": 2167.7136, "encoder_q-layer.0": 1487.0023, "encoder_q-layer.1": 1528.677, "encoder_q-layer.10": 2326.8101, "encoder_q-layer.11": 5402.6016, "encoder_q-layer.2": 1686.699, "encoder_q-layer.3": 1718.5614, "encoder_q-layer.4": 1807.3522, "encoder_q-layer.5": 1885.3289, "encoder_q-layer.6": 2133.0496, "encoder_q-layer.7": 2211.5408, "encoder_q-layer.8": 2459.6746, "encoder_q-layer.9": 2237.7837, "epoch": 0.47, "inbatch_neg_score": 0.2838, "inbatch_pos_score": 0.9526, "learning_rate": 2.877777777777778e-05, "loss": 3.2195, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.5949, "num_token_overlap": 15.7928, "num_token_query": 42.36, "num_token_union": 68.3999, "num_word_context": 202.4268, "num_word_doc": 49.7219, "num_word_query": 32.0107, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3631.9403, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.283, "query_norm": 1.3823, "queue_k_norm": 1.4386, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.36, "sent_len_1": 66.5949, "sent_len_max_0": 127.9725, "sent_len_max_1": 188.1225, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.2059, "doc_norm": 1.435, "encoder_q-embeddings": 2095.7925, "encoder_q-layer.0": 1427.5638, "encoder_q-layer.1": 1493.1532, "encoder_q-layer.10": 2298.4236, "encoder_q-layer.11": 5171.2979, "encoder_q-layer.2": 1802.207, "encoder_q-layer.3": 1835.713, "encoder_q-layer.4": 1941.6975, "encoder_q-layer.5": 1925.1289, "encoder_q-layer.6": 2044.6285, "encoder_q-layer.7": 2202.9761, "encoder_q-layer.8": 2676.1746, "encoder_q-layer.9": 2258.4277, "epoch": 0.47, "inbatch_neg_score": 0.2776, "inbatch_pos_score": 0.9438, "learning_rate": 2.8722222222222222e-05, "loss": 3.2059, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.6532, "num_token_overlap": 15.8205, "num_token_query": 42.328, "num_token_union": 68.4383, "num_word_context": 202.1601, "num_word_doc": 49.6912, "num_word_query": 31.9576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3581.3396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2749, "query_norm": 1.373, "queue_k_norm": 1.4386, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.328, "sent_len_1": 66.6532, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2962, "stdk": 0.048, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.217, "doc_norm": 1.4382, "encoder_q-embeddings": 3657.5273, "encoder_q-layer.0": 2579.9905, "encoder_q-layer.1": 3062.5122, "encoder_q-layer.10": 2255.2493, "encoder_q-layer.11": 5191.5327, "encoder_q-layer.2": 3161.0215, "encoder_q-layer.3": 3269.8154, "encoder_q-layer.4": 3476.0818, "encoder_q-layer.5": 3466.4028, "encoder_q-layer.6": 3988.0093, "encoder_q-layer.7": 3358.0894, "encoder_q-layer.8": 3009.8008, "encoder_q-layer.9": 2352.0813, "epoch": 0.47, "inbatch_neg_score": 0.2751, "inbatch_pos_score": 0.9517, "learning_rate": 2.8666666666666668e-05, "loss": 3.217, "norm_diff": 0.0666, "norm_loss": 0.0, "num_token_doc": 66.7285, "num_token_overlap": 15.7438, "num_token_query": 42.1431, "num_token_union": 68.4055, "num_word_context": 202.5044, "num_word_doc": 49.7973, "num_word_query": 31.8272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5029.7263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2764, "query_norm": 1.3716, "queue_k_norm": 1.4392, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1431, "sent_len_1": 66.7285, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1637, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.193, "doc_norm": 1.4356, "encoder_q-embeddings": 2464.5181, "encoder_q-layer.0": 1659.2183, "encoder_q-layer.1": 1779.5403, "encoder_q-layer.10": 2434.3691, "encoder_q-layer.11": 5155.9951, "encoder_q-layer.2": 1955.0481, "encoder_q-layer.3": 2074.9382, "encoder_q-layer.4": 2077.4866, "encoder_q-layer.5": 2139.571, "encoder_q-layer.6": 2427.8198, "encoder_q-layer.7": 2454.4609, "encoder_q-layer.8": 2685.6094, "encoder_q-layer.9": 2248.8926, "epoch": 0.47, "inbatch_neg_score": 0.2804, "inbatch_pos_score": 0.9424, "learning_rate": 2.861111111111111e-05, "loss": 3.193, "norm_diff": 0.0504, "norm_loss": 0.0, "num_token_doc": 66.7411, "num_token_overlap": 15.818, "num_token_query": 42.4441, "num_token_union": 68.5496, "num_word_context": 202.314, "num_word_doc": 49.7846, "num_word_query": 32.0964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3865.9273, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2788, "query_norm": 1.3852, "queue_k_norm": 1.438, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4441, "sent_len_1": 66.7411, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1238, "stdk": 0.048, "stdq": 0.0453, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.1946, "doc_norm": 1.4493, "encoder_q-embeddings": 2185.5645, "encoder_q-layer.0": 1373.4442, "encoder_q-layer.1": 1432.0785, "encoder_q-layer.10": 2718.9014, "encoder_q-layer.11": 5762.5151, "encoder_q-layer.2": 1607.9471, "encoder_q-layer.3": 1676.6758, "encoder_q-layer.4": 1855.1954, "encoder_q-layer.5": 1878.2493, "encoder_q-layer.6": 2044.7662, "encoder_q-layer.7": 2362.6311, "encoder_q-layer.8": 2701.8052, "encoder_q-layer.9": 2383.0498, "epoch": 0.47, "inbatch_neg_score": 0.2867, "inbatch_pos_score": 0.9424, "learning_rate": 2.855555555555556e-05, "loss": 3.1946, "norm_diff": 0.0795, "norm_loss": 0.0, "num_token_doc": 66.9033, "num_token_overlap": 15.847, "num_token_query": 42.3627, "num_token_union": 68.575, "num_word_context": 202.2584, "num_word_doc": 49.904, "num_word_query": 32.0098, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3905.2383, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2861, "query_norm": 1.3698, "queue_k_norm": 1.4374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3627, "sent_len_1": 66.9033, "sent_len_max_0": 127.995, "sent_len_max_1": 190.7225, "stdk": 0.0485, "stdq": 0.0444, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 3.2267, "doc_norm": 1.4386, "encoder_q-embeddings": 2991.1821, "encoder_q-layer.0": 1974.0186, "encoder_q-layer.1": 2312.6772, "encoder_q-layer.10": 2490.4446, "encoder_q-layer.11": 5296.4077, "encoder_q-layer.2": 2732.5381, "encoder_q-layer.3": 2901.2939, "encoder_q-layer.4": 3034.3882, "encoder_q-layer.5": 2911.314, "encoder_q-layer.6": 2929.4094, "encoder_q-layer.7": 3115.7986, "encoder_q-layer.8": 2823.1006, "encoder_q-layer.9": 2262.6772, "epoch": 0.48, "inbatch_neg_score": 0.2758, "inbatch_pos_score": 0.9624, "learning_rate": 2.8499999999999998e-05, "loss": 3.2267, "norm_diff": 0.0535, "norm_loss": 0.0, "num_token_doc": 66.6969, "num_token_overlap": 15.7832, "num_token_query": 42.2356, "num_token_union": 68.3949, "num_word_context": 201.6436, "num_word_doc": 49.7861, "num_word_query": 31.9146, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4478.5205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2734, "query_norm": 1.385, "queue_k_norm": 1.4387, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2356, "sent_len_1": 66.6969, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2425, "stdk": 0.0481, "stdq": 0.0452, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.2047, "doc_norm": 1.4388, "encoder_q-embeddings": 2453.031, "encoder_q-layer.0": 1581.3114, "encoder_q-layer.1": 1644.5813, "encoder_q-layer.10": 2256.8101, "encoder_q-layer.11": 5243.7339, "encoder_q-layer.2": 1757.6664, "encoder_q-layer.3": 1853.7058, "encoder_q-layer.4": 1971.9818, "encoder_q-layer.5": 2048.7217, "encoder_q-layer.6": 2161.1206, "encoder_q-layer.7": 2235.3572, "encoder_q-layer.8": 2528.8567, "encoder_q-layer.9": 2288.2998, "epoch": 0.48, "inbatch_neg_score": 0.2638, "inbatch_pos_score": 0.9365, "learning_rate": 2.8444444444444447e-05, "loss": 3.2047, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.8413, "num_token_overlap": 15.8304, "num_token_query": 42.3838, "num_token_union": 68.5241, "num_word_context": 202.3235, "num_word_doc": 49.8537, "num_word_query": 32.0036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3711.9653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.3746, "queue_k_norm": 1.4389, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3838, "sent_len_1": 66.8413, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4675, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.2089, "doc_norm": 1.4367, "encoder_q-embeddings": 2178.0579, "encoder_q-layer.0": 1399.0219, "encoder_q-layer.1": 1499.905, "encoder_q-layer.10": 2305.3794, "encoder_q-layer.11": 5176.9702, "encoder_q-layer.2": 1712.3727, "encoder_q-layer.3": 1847.4287, "encoder_q-layer.4": 1988.6731, "encoder_q-layer.5": 2073.4514, "encoder_q-layer.6": 2221.6206, "encoder_q-layer.7": 2275.948, "encoder_q-layer.8": 2438.4377, "encoder_q-layer.9": 2203.6497, "epoch": 0.48, "inbatch_neg_score": 0.2588, "inbatch_pos_score": 0.9116, "learning_rate": 2.8388888888888893e-05, "loss": 3.2089, "norm_diff": 0.0704, "norm_loss": 0.0, "num_token_doc": 66.6893, "num_token_overlap": 15.7667, "num_token_query": 42.1878, "num_token_union": 68.3677, "num_word_context": 202.3404, "num_word_doc": 49.7079, "num_word_query": 31.8714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3592.2369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2588, "query_norm": 1.3663, "queue_k_norm": 1.4387, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1878, "sent_len_1": 66.6893, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.37, "stdk": 0.0481, "stdq": 0.0445, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1921, "doc_norm": 1.4391, "encoder_q-embeddings": 3035.8281, "encoder_q-layer.0": 2042.5818, "encoder_q-layer.1": 2294.6553, "encoder_q-layer.10": 2281.2241, "encoder_q-layer.11": 5258.5908, "encoder_q-layer.2": 2614.1514, "encoder_q-layer.3": 2708.0005, "encoder_q-layer.4": 2907.5696, "encoder_q-layer.5": 2933.9768, "encoder_q-layer.6": 3098.4402, "encoder_q-layer.7": 3246.927, "encoder_q-layer.8": 3144.1506, "encoder_q-layer.9": 2621.627, "epoch": 0.48, "inbatch_neg_score": 0.2673, "inbatch_pos_score": 0.9404, "learning_rate": 2.8333333333333335e-05, "loss": 3.1921, "norm_diff": 0.0405, "norm_loss": 0.0, "num_token_doc": 67.1445, "num_token_overlap": 15.924, "num_token_query": 42.5661, "num_token_union": 68.7477, "num_word_context": 202.8732, "num_word_doc": 50.0569, "num_word_query": 32.1624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4502.4253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2678, "query_norm": 1.3986, "queue_k_norm": 1.4396, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5661, "sent_len_1": 67.1445, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1025, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.2042, "doc_norm": 1.4393, "encoder_q-embeddings": 3679.2588, "encoder_q-layer.0": 2480.0735, "encoder_q-layer.1": 2852.6548, "encoder_q-layer.10": 2430.3606, "encoder_q-layer.11": 5580.3955, "encoder_q-layer.2": 3278.302, "encoder_q-layer.3": 3411.4878, "encoder_q-layer.4": 3402.2288, "encoder_q-layer.5": 3580.8357, "encoder_q-layer.6": 3154.8032, "encoder_q-layer.7": 3675.5554, "encoder_q-layer.8": 3163.9763, "encoder_q-layer.9": 2618.6287, "epoch": 0.48, "inbatch_neg_score": 0.2711, "inbatch_pos_score": 0.9414, "learning_rate": 2.827777777777778e-05, "loss": 3.2042, "norm_diff": 0.048, "norm_loss": 0.0, "num_token_doc": 66.7213, "num_token_overlap": 15.7829, "num_token_query": 42.2043, "num_token_union": 68.4135, "num_word_context": 202.3027, "num_word_doc": 49.7848, "num_word_query": 31.8523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5103.621, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2715, "query_norm": 1.3913, "queue_k_norm": 1.4372, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2043, "sent_len_1": 66.7213, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.1287, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 52.1484, "active_queue_size": 16384.0, "cl_loss": 3.1892, "doc_norm": 1.4381, "encoder_q-embeddings": 1996.1741, "encoder_q-layer.0": 1339.9786, "encoder_q-layer.1": 1371.5022, "encoder_q-layer.10": 2437.8484, "encoder_q-layer.11": 5636.8945, "encoder_q-layer.2": 1551.6685, "encoder_q-layer.3": 1586.6486, "encoder_q-layer.4": 1701.7739, "encoder_q-layer.5": 1694.9728, "encoder_q-layer.6": 1895.7318, "encoder_q-layer.7": 2216.072, "encoder_q-layer.8": 2634.0293, "encoder_q-layer.9": 2324.7322, "epoch": 0.48, "inbatch_neg_score": 0.2747, "inbatch_pos_score": 0.9277, "learning_rate": 2.8222222222222223e-05, "loss": 3.1892, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.9438, "num_token_overlap": 15.8292, "num_token_query": 42.2783, "num_token_union": 68.4762, "num_word_context": 202.2324, "num_word_doc": 49.9248, "num_word_query": 31.9621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3655.4619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2729, "query_norm": 1.3864, "queue_k_norm": 1.4408, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2783, "sent_len_1": 66.9438, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.9025, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.1914, "doc_norm": 1.4347, "encoder_q-embeddings": 3762.4136, "encoder_q-layer.0": 2528.8782, "encoder_q-layer.1": 2752.7263, "encoder_q-layer.10": 2299.78, "encoder_q-layer.11": 5269.7632, "encoder_q-layer.2": 3330.6082, "encoder_q-layer.3": 3550.4485, "encoder_q-layer.4": 4115.4429, "encoder_q-layer.5": 4032.4834, "encoder_q-layer.6": 3618.635, "encoder_q-layer.7": 3842.3408, "encoder_q-layer.8": 3436.5017, "encoder_q-layer.9": 2406.7981, "epoch": 0.48, "inbatch_neg_score": 0.284, "inbatch_pos_score": 0.9678, "learning_rate": 2.816666666666667e-05, "loss": 3.1914, "norm_diff": 0.0373, "norm_loss": 0.0, "num_token_doc": 66.9219, "num_token_overlap": 15.9237, "num_token_query": 42.4081, "num_token_union": 68.5612, "num_word_context": 202.6773, "num_word_doc": 49.9808, "num_word_query": 32.0489, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5301.4666, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2837, "query_norm": 1.3974, "queue_k_norm": 1.4369, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4081, "sent_len_1": 66.9219, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.4963, "stdk": 0.0481, "stdq": 0.045, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1943, "doc_norm": 1.4447, "encoder_q-embeddings": 2704.8833, "encoder_q-layer.0": 1852.9587, "encoder_q-layer.1": 1958.304, "encoder_q-layer.10": 2418.979, "encoder_q-layer.11": 4992.1543, "encoder_q-layer.2": 2241.5952, "encoder_q-layer.3": 2446.0786, "encoder_q-layer.4": 2441.397, "encoder_q-layer.5": 2598.543, "encoder_q-layer.6": 2516.9104, "encoder_q-layer.7": 2565.9548, "encoder_q-layer.8": 2692.2634, "encoder_q-layer.9": 2435.2402, "epoch": 0.48, "inbatch_neg_score": 0.2794, "inbatch_pos_score": 0.9595, "learning_rate": 2.811111111111111e-05, "loss": 3.1943, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.8178, "num_token_overlap": 15.833, "num_token_query": 42.3996, "num_token_union": 68.5875, "num_word_context": 202.2273, "num_word_doc": 49.8425, "num_word_query": 32.0315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4009.9538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2793, "query_norm": 1.4069, "queue_k_norm": 1.4375, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3996, "sent_len_1": 66.8178, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.315, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.186, "doc_norm": 1.4409, "encoder_q-embeddings": 2065.3928, "encoder_q-layer.0": 1395.8959, "encoder_q-layer.1": 1487.845, "encoder_q-layer.10": 2683.8035, "encoder_q-layer.11": 5351.2144, "encoder_q-layer.2": 1661.6254, "encoder_q-layer.3": 1688.7136, "encoder_q-layer.4": 1799.9027, "encoder_q-layer.5": 1867.1653, "encoder_q-layer.6": 2029.0455, "encoder_q-layer.7": 2327.53, "encoder_q-layer.8": 2665.2371, "encoder_q-layer.9": 2323.9041, "epoch": 0.48, "inbatch_neg_score": 0.285, "inbatch_pos_score": 0.9668, "learning_rate": 2.8055555555555557e-05, "loss": 3.186, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 15.815, "num_token_query": 42.3788, "num_token_union": 68.6019, "num_word_context": 202.608, "num_word_doc": 49.9082, "num_word_query": 32.0104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3646.77, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2847, "query_norm": 1.4163, "queue_k_norm": 1.4382, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3788, "sent_len_1": 66.883, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7825, "stdk": 0.0483, "stdq": 0.0456, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.173, "doc_norm": 1.4409, "encoder_q-embeddings": 2572.0203, "encoder_q-layer.0": 1666.5792, "encoder_q-layer.1": 1846.3544, "encoder_q-layer.10": 2344.2781, "encoder_q-layer.11": 5502.8247, "encoder_q-layer.2": 2153.6104, "encoder_q-layer.3": 2427.7568, "encoder_q-layer.4": 2618.3813, "encoder_q-layer.5": 2620.4629, "encoder_q-layer.6": 2791.2676, "encoder_q-layer.7": 2836.2048, "encoder_q-layer.8": 2852.8706, "encoder_q-layer.9": 2450.4705, "epoch": 0.48, "inbatch_neg_score": 0.2908, "inbatch_pos_score": 0.9575, "learning_rate": 2.8000000000000003e-05, "loss": 3.173, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 66.9736, "num_token_overlap": 15.8605, "num_token_query": 42.379, "num_token_union": 68.6272, "num_word_context": 202.4334, "num_word_doc": 49.9541, "num_word_query": 31.9982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4179.026, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.29, "query_norm": 1.4048, "queue_k_norm": 1.4377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.379, "sent_len_1": 66.9736, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.775, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.2118, "doc_norm": 1.4416, "encoder_q-embeddings": 4552.6416, "encoder_q-layer.0": 2938.4893, "encoder_q-layer.1": 3238.5691, "encoder_q-layer.10": 5088.7764, "encoder_q-layer.11": 11130.4043, "encoder_q-layer.2": 3699.2769, "encoder_q-layer.3": 3774.7446, "encoder_q-layer.4": 3907.8818, "encoder_q-layer.5": 3860.886, "encoder_q-layer.6": 4252.9653, "encoder_q-layer.7": 4748.0693, "encoder_q-layer.8": 5284.4854, "encoder_q-layer.9": 5051.0293, "epoch": 0.49, "inbatch_neg_score": 0.2912, "inbatch_pos_score": 0.9731, "learning_rate": 2.7944444444444445e-05, "loss": 3.2118, "norm_diff": 0.0259, "norm_loss": 0.0, "num_token_doc": 66.9194, "num_token_overlap": 15.8, "num_token_query": 42.1264, "num_token_union": 68.5121, "num_word_context": 202.1182, "num_word_doc": 49.9605, "num_word_query": 31.8245, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7581.6396, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2903, "query_norm": 1.417, "queue_k_norm": 1.4389, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1264, "sent_len_1": 66.9194, "sent_len_max_0": 127.995, "sent_len_max_1": 188.8988, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1988, "doc_norm": 1.4412, "encoder_q-embeddings": 5401.1201, "encoder_q-layer.0": 3455.2917, "encoder_q-layer.1": 3842.0251, "encoder_q-layer.10": 4777.5957, "encoder_q-layer.11": 10721.1699, "encoder_q-layer.2": 4465.27, "encoder_q-layer.3": 4609.2065, "encoder_q-layer.4": 4969.3267, "encoder_q-layer.5": 4925.5659, "encoder_q-layer.6": 5397.6016, "encoder_q-layer.7": 5290.1904, "encoder_q-layer.8": 5796.7461, "encoder_q-layer.9": 4750.144, "epoch": 0.49, "inbatch_neg_score": 0.2933, "inbatch_pos_score": 0.9688, "learning_rate": 2.788888888888889e-05, "loss": 3.1988, "norm_diff": 0.0503, "norm_loss": 0.0, "num_token_doc": 66.9331, "num_token_overlap": 15.7894, "num_token_query": 42.2327, "num_token_union": 68.5292, "num_word_context": 202.4396, "num_word_doc": 49.89, "num_word_query": 31.8982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8407.1502, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2922, "query_norm": 1.3909, "queue_k_norm": 1.4406, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2327, "sent_len_1": 66.9331, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6738, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.2217, "doc_norm": 1.4352, "encoder_q-embeddings": 4632.5459, "encoder_q-layer.0": 2921.4548, "encoder_q-layer.1": 3220.3586, "encoder_q-layer.10": 5098.356, "encoder_q-layer.11": 11235.3379, "encoder_q-layer.2": 3732.22, "encoder_q-layer.3": 3798.6936, "encoder_q-layer.4": 4132.9355, "encoder_q-layer.5": 4199.7939, "encoder_q-layer.6": 4211.6406, "encoder_q-layer.7": 4560.6113, "encoder_q-layer.8": 5079.6157, "encoder_q-layer.9": 4825.167, "epoch": 0.49, "inbatch_neg_score": 0.2934, "inbatch_pos_score": 0.9312, "learning_rate": 2.7833333333333333e-05, "loss": 3.2217, "norm_diff": 0.0641, "norm_loss": 0.0, "num_token_doc": 66.7393, "num_token_overlap": 15.749, "num_token_query": 42.2016, "num_token_union": 68.4661, "num_word_context": 202.3592, "num_word_doc": 49.7811, "num_word_query": 31.8514, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7750.2067, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.293, "query_norm": 1.3711, "queue_k_norm": 1.4394, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2016, "sent_len_1": 66.7393, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8237, "stdk": 0.048, "stdq": 0.0442, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.206, "doc_norm": 1.4359, "encoder_q-embeddings": 5654.083, "encoder_q-layer.0": 3856.395, "encoder_q-layer.1": 4220.3892, "encoder_q-layer.10": 5282.7705, "encoder_q-layer.11": 12144.4434, "encoder_q-layer.2": 4826.543, "encoder_q-layer.3": 5445.8804, "encoder_q-layer.4": 5780.9312, "encoder_q-layer.5": 5884.3013, "encoder_q-layer.6": 5686.0493, "encoder_q-layer.7": 5402.4375, "encoder_q-layer.8": 5829.6802, "encoder_q-layer.9": 5224.5654, "epoch": 0.49, "inbatch_neg_score": 0.2906, "inbatch_pos_score": 0.9546, "learning_rate": 2.777777777777778e-05, "loss": 3.206, "norm_diff": 0.0474, "norm_loss": 0.0, "num_token_doc": 66.7353, "num_token_overlap": 15.7838, "num_token_query": 42.1483, "num_token_union": 68.4089, "num_word_context": 201.8495, "num_word_doc": 49.8226, "num_word_query": 31.8208, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8986.9441, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2915, "query_norm": 1.3885, "queue_k_norm": 1.4389, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1483, "sent_len_1": 66.7353, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5412, "stdk": 0.0481, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 27.3054, "dev_samples_per_second": 2.344, "dev_steps_per_second": 0.037, "epoch": 0.49, "step": 50000, "test_accuracy": 93.39599609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3743959367275238, "test_doc_norm": 1.4244897365570068, "test_inbatch_neg_score": 0.6470443606376648, "test_inbatch_pos_score": 1.5771232843399048, "test_loss": 0.3743959367275238, "test_loss_align": 1.0859118700027466, "test_loss_unif": 3.8233420848846436, "test_loss_unif_q@queue": 3.8233423233032227, "test_norm_diff": 0.051280297338962555, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2844110429286957, "test_query_norm": 1.4757699966430664, "test_queue_k_norm": 1.438543438911438, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04247135668992996, "test_stdq": 0.04268572852015495, "test_stdqueue_k": 0.04826151952147484, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.3054, "dev_samples_per_second": 2.344, "dev_steps_per_second": 0.037, "epoch": 0.49, "eval_beir-arguana_ndcg@10": 0.37992, "eval_beir-arguana_recall@10": 0.64154, "eval_beir-arguana_recall@100": 0.92532, "eval_beir-arguana_recall@20": 0.76885, "eval_beir-avg_ndcg@10": 0.36474724999999997, "eval_beir-avg_recall@10": 0.43434116666666667, "eval_beir-avg_recall@100": 0.613067, "eval_beir-avg_recall@20": 0.49364566666666665, "eval_beir-cqadupstack_ndcg@10": 0.2579725, "eval_beir-cqadupstack_recall@10": 0.3505916666666667, "eval_beir-cqadupstack_recall@100": 0.5751799999999999, "eval_beir-cqadupstack_recall@20": 0.41564666666666666, "eval_beir-fiqa_ndcg@10": 0.23424, "eval_beir-fiqa_recall@10": 0.28859, "eval_beir-fiqa_recall@100": 0.55404, "eval_beir-fiqa_recall@20": 0.36203, "eval_beir-nfcorpus_ndcg@10": 0.28618, "eval_beir-nfcorpus_recall@10": 0.14079, "eval_beir-nfcorpus_recall@100": 0.2688, "eval_beir-nfcorpus_recall@20": 0.16433, "eval_beir-nq_ndcg@10": 0.2582, "eval_beir-nq_recall@10": 0.42449, "eval_beir-nq_recall@100": 0.76267, "eval_beir-nq_recall@20": 0.53498, "eval_beir-quora_ndcg@10": 0.75595, "eval_beir-quora_recall@10": 0.86918, "eval_beir-quora_recall@100": 0.97057, "eval_beir-quora_recall@20": 0.91467, "eval_beir-scidocs_ndcg@10": 0.14593, "eval_beir-scidocs_recall@10": 0.15455, "eval_beir-scidocs_recall@100": 0.3537, "eval_beir-scidocs_recall@20": 0.2115, "eval_beir-scifact_ndcg@10": 0.62962, "eval_beir-scifact_recall@10": 0.79267, "eval_beir-scifact_recall@100": 0.91656, "eval_beir-scifact_recall@20": 0.82978, "eval_beir-trec-covid_ndcg@10": 0.51269, "eval_beir-trec-covid_recall@10": 0.544, "eval_beir-trec-covid_recall@100": 0.4036, "eval_beir-trec-covid_recall@20": 0.535, "eval_beir-webis-touche2020_ndcg@10": 0.18677, "eval_beir-webis-touche2020_recall@10": 0.13701, "eval_beir-webis-touche2020_recall@100": 0.40023, "eval_beir-webis-touche2020_recall@20": 0.19967, "eval_senteval-avg_sts": 0.7605710959885856, "eval_senteval-sickr_spearman": 0.7262568149433587, "eval_senteval-stsb_spearman": 0.7948853770338125, "step": 50000, "test_accuracy": 93.39599609375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3743959367275238, "test_doc_norm": 1.4244897365570068, "test_inbatch_neg_score": 0.6470443606376648, "test_inbatch_pos_score": 1.5771232843399048, "test_loss": 0.3743959367275238, "test_loss_align": 1.0859118700027466, "test_loss_unif": 3.8233420848846436, "test_loss_unif_q@queue": 3.8233423233032227, "test_norm_diff": 0.051280297338962555, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.2844110429286957, "test_query_norm": 1.4757699966430664, "test_queue_k_norm": 1.438543438911438, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04247135668992996, "test_stdq": 0.04268572852015495, "test_stdqueue_k": 0.04826151952147484, "test_stdqueue_q": 0.0 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.2091, "doc_norm": 1.4461, "encoder_q-embeddings": 5152.6387, "encoder_q-layer.0": 3602.8308, "encoder_q-layer.1": 3874.9421, "encoder_q-layer.10": 4955.8335, "encoder_q-layer.11": 11175.2695, "encoder_q-layer.2": 4441.5063, "encoder_q-layer.3": 4633.0796, "encoder_q-layer.4": 5020.0068, "encoder_q-layer.5": 4830.2207, "encoder_q-layer.6": 4842.1626, "encoder_q-layer.7": 4860.6929, "encoder_q-layer.8": 5480.4087, "encoder_q-layer.9": 4784.7568, "epoch": 0.49, "inbatch_neg_score": 0.287, "inbatch_pos_score": 0.9766, "learning_rate": 2.772222222222222e-05, "loss": 3.2091, "norm_diff": 0.0495, "norm_loss": 0.0, "num_token_doc": 66.9825, "num_token_overlap": 15.8121, "num_token_query": 42.288, "num_token_union": 68.5656, "num_word_context": 202.6428, "num_word_doc": 50.013, "num_word_query": 31.9714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8279.6109, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2869, "query_norm": 1.3965, "queue_k_norm": 1.4401, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.288, "sent_len_1": 66.9825, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5, "stdk": 0.0484, "stdq": 0.0456, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.1996, "doc_norm": 1.444, "encoder_q-embeddings": 4817.0498, "encoder_q-layer.0": 3213.2649, "encoder_q-layer.1": 3387.7129, "encoder_q-layer.10": 4712.5513, "encoder_q-layer.11": 11057.6123, "encoder_q-layer.2": 3919.3, "encoder_q-layer.3": 3650.3962, "encoder_q-layer.4": 3728.0093, "encoder_q-layer.5": 3721.7883, "encoder_q-layer.6": 4129.6318, "encoder_q-layer.7": 4630.0234, "encoder_q-layer.8": 5272.332, "encoder_q-layer.9": 4734.3271, "epoch": 0.49, "inbatch_neg_score": 0.2924, "inbatch_pos_score": 0.9424, "learning_rate": 2.7666666666666667e-05, "loss": 3.1996, "norm_diff": 0.0701, "norm_loss": 0.0, "num_token_doc": 66.8826, "num_token_overlap": 15.8139, "num_token_query": 42.2775, "num_token_union": 68.5378, "num_word_context": 202.5148, "num_word_doc": 49.9321, "num_word_query": 31.9584, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7791.9835, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.292, "query_norm": 1.3738, "queue_k_norm": 1.4415, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2775, "sent_len_1": 66.8826, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9075, "stdk": 0.0483, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1843, "doc_norm": 1.4439, "encoder_q-embeddings": 10440.627, "encoder_q-layer.0": 6945.4707, "encoder_q-layer.1": 7616.644, "encoder_q-layer.10": 4604.4668, "encoder_q-layer.11": 10652.5879, "encoder_q-layer.2": 8422.541, "encoder_q-layer.3": 7742.8193, "encoder_q-layer.4": 7867.2544, "encoder_q-layer.5": 7604.2773, "encoder_q-layer.6": 7236.2197, "encoder_q-layer.7": 6455.6694, "encoder_q-layer.8": 5892.2295, "encoder_q-layer.9": 4792.958, "epoch": 0.49, "inbatch_neg_score": 0.2878, "inbatch_pos_score": 0.9492, "learning_rate": 2.761111111111111e-05, "loss": 3.1843, "norm_diff": 0.0615, "norm_loss": 0.0, "num_token_doc": 66.5537, "num_token_overlap": 15.7927, "num_token_query": 42.398, "num_token_union": 68.4416, "num_word_context": 202.3146, "num_word_doc": 49.6807, "num_word_query": 32.0323, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11677.9249, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2874, "query_norm": 1.3824, "queue_k_norm": 1.4408, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.398, "sent_len_1": 66.5537, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9525, "stdk": 0.0483, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.194, "doc_norm": 1.4323, "encoder_q-embeddings": 4043.1128, "encoder_q-layer.0": 2657.1079, "encoder_q-layer.1": 2777.052, "encoder_q-layer.10": 4739.6763, "encoder_q-layer.11": 10799.5723, "encoder_q-layer.2": 3046.5632, "encoder_q-layer.3": 3136.8035, "encoder_q-layer.4": 3246.7471, "encoder_q-layer.5": 3371.6538, "encoder_q-layer.6": 3909.134, "encoder_q-layer.7": 4700.999, "encoder_q-layer.8": 5621.395, "encoder_q-layer.9": 4722.5542, "epoch": 0.49, "inbatch_neg_score": 0.2861, "inbatch_pos_score": 0.9565, "learning_rate": 2.7555555555555555e-05, "loss": 3.194, "norm_diff": 0.0667, "norm_loss": 0.0, "num_token_doc": 66.8558, "num_token_overlap": 15.8196, "num_token_query": 42.2917, "num_token_union": 68.5261, "num_word_context": 202.4635, "num_word_doc": 49.9034, "num_word_query": 31.9723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7250.9941, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2856, "query_norm": 1.3656, "queue_k_norm": 1.4416, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2917, "sent_len_1": 66.8558, "sent_len_max_0": 127.9975, "sent_len_max_1": 187.625, "stdk": 0.0479, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.2052, "doc_norm": 1.438, "encoder_q-embeddings": 8430.3447, "encoder_q-layer.0": 6227.123, "encoder_q-layer.1": 6940.335, "encoder_q-layer.10": 5029.6538, "encoder_q-layer.11": 11535.2422, "encoder_q-layer.2": 8055.4917, "encoder_q-layer.3": 7980.3037, "encoder_q-layer.4": 7769.7397, "encoder_q-layer.5": 7912.6494, "encoder_q-layer.6": 8499.832, "encoder_q-layer.7": 8076.3325, "encoder_q-layer.8": 6938.1943, "encoder_q-layer.9": 5542.0513, "epoch": 0.49, "inbatch_neg_score": 0.2823, "inbatch_pos_score": 0.9517, "learning_rate": 2.7500000000000004e-05, "loss": 3.2052, "norm_diff": 0.0444, "norm_loss": 0.0, "num_token_doc": 66.7616, "num_token_overlap": 15.8246, "num_token_query": 42.3825, "num_token_union": 68.5384, "num_word_context": 202.2209, "num_word_doc": 49.8224, "num_word_query": 32.0264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11494.419, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2822, "query_norm": 1.3936, "queue_k_norm": 1.4402, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3825, "sent_len_1": 66.7616, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7763, "stdk": 0.0481, "stdq": 0.0454, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1831, "doc_norm": 1.4431, "encoder_q-embeddings": 5551.7998, "encoder_q-layer.0": 3897.7324, "encoder_q-layer.1": 4397.6138, "encoder_q-layer.10": 5022.8882, "encoder_q-layer.11": 10256.1592, "encoder_q-layer.2": 5144.8379, "encoder_q-layer.3": 5055.2173, "encoder_q-layer.4": 5308.4214, "encoder_q-layer.5": 5459.6558, "encoder_q-layer.6": 5635.4478, "encoder_q-layer.7": 5909.6816, "encoder_q-layer.8": 5853.3374, "encoder_q-layer.9": 4677.8047, "epoch": 0.49, "inbatch_neg_score": 0.2873, "inbatch_pos_score": 0.9526, "learning_rate": 2.7444444444444443e-05, "loss": 3.1831, "norm_diff": 0.0751, "norm_loss": 0.0, "num_token_doc": 66.7366, "num_token_overlap": 15.8413, "num_token_query": 42.2706, "num_token_union": 68.3896, "num_word_context": 202.1532, "num_word_doc": 49.8251, "num_word_query": 31.9239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8555.741, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2871, "query_norm": 1.3681, "queue_k_norm": 1.4405, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2706, "sent_len_1": 66.7366, "sent_len_max_0": 128.0, "sent_len_max_1": 188.895, "stdk": 0.0483, "stdq": 0.0443, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1712, "doc_norm": 1.4398, "encoder_q-embeddings": 7329.4004, "encoder_q-layer.0": 5353.0869, "encoder_q-layer.1": 6093.4312, "encoder_q-layer.10": 2413.4238, "encoder_q-layer.11": 5335.8784, "encoder_q-layer.2": 6776.7393, "encoder_q-layer.3": 6867.4473, "encoder_q-layer.4": 7386.2891, "encoder_q-layer.5": 8480.8926, "encoder_q-layer.6": 8041.1196, "encoder_q-layer.7": 5477.8188, "encoder_q-layer.8": 5127.853, "encoder_q-layer.9": 3686.8076, "epoch": 0.49, "inbatch_neg_score": 0.293, "inbatch_pos_score": 0.9648, "learning_rate": 2.7388888888888892e-05, "loss": 3.1712, "norm_diff": 0.0568, "norm_loss": 0.0, "num_token_doc": 66.7703, "num_token_overlap": 15.8863, "num_token_query": 42.3897, "num_token_union": 68.4112, "num_word_context": 202.5956, "num_word_doc": 49.8209, "num_word_query": 32.0248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9220.5321, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.293, "query_norm": 1.383, "queue_k_norm": 1.4409, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3897, "sent_len_1": 66.7703, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9875, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.185, "doc_norm": 1.4379, "encoder_q-embeddings": 1267.5594, "encoder_q-layer.0": 875.0698, "encoder_q-layer.1": 890.4495, "encoder_q-layer.10": 1228.2283, "encoder_q-layer.11": 2575.095, "encoder_q-layer.2": 974.6134, "encoder_q-layer.3": 1038.3286, "encoder_q-layer.4": 1067.9214, "encoder_q-layer.5": 1077.2649, "encoder_q-layer.6": 1130.0496, "encoder_q-layer.7": 1211.1837, "encoder_q-layer.8": 1358.5406, "encoder_q-layer.9": 1166.7609, "epoch": 0.5, "inbatch_neg_score": 0.2949, "inbatch_pos_score": 0.9565, "learning_rate": 2.733333333333333e-05, "loss": 3.185, "norm_diff": 0.0546, "norm_loss": 0.0, "num_token_doc": 66.9235, "num_token_overlap": 15.85, "num_token_query": 42.2834, "num_token_union": 68.5155, "num_word_context": 202.4098, "num_word_doc": 49.9276, "num_word_query": 31.9518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1928.2155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2947, "query_norm": 1.3834, "queue_k_norm": 1.441, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2834, "sent_len_1": 66.9235, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.9975, "stdk": 0.0481, "stdq": 0.0446, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1803, "doc_norm": 1.4401, "encoder_q-embeddings": 1440.1233, "encoder_q-layer.0": 932.5136, "encoder_q-layer.1": 1048.9191, "encoder_q-layer.10": 1270.3284, "encoder_q-layer.11": 2769.6211, "encoder_q-layer.2": 1180.9591, "encoder_q-layer.3": 1171.8962, "encoder_q-layer.4": 1171.9059, "encoder_q-layer.5": 1212.2422, "encoder_q-layer.6": 1275.9906, "encoder_q-layer.7": 1312.8248, "encoder_q-layer.8": 1396.1355, "encoder_q-layer.9": 1256.6458, "epoch": 0.5, "inbatch_neg_score": 0.2939, "inbatch_pos_score": 0.9736, "learning_rate": 2.727777777777778e-05, "loss": 3.1803, "norm_diff": 0.0341, "norm_loss": 0.0, "num_token_doc": 66.6803, "num_token_overlap": 15.8581, "num_token_query": 42.5655, "num_token_union": 68.5728, "num_word_context": 202.7568, "num_word_doc": 49.78, "num_word_query": 32.1649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2128.4396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2952, "query_norm": 1.4062, "queue_k_norm": 1.4429, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5655, "sent_len_1": 66.6803, "sent_len_max_0": 128.0, "sent_len_max_1": 187.9338, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.2001, "doc_norm": 1.44, "encoder_q-embeddings": 1098.6838, "encoder_q-layer.0": 714.6933, "encoder_q-layer.1": 785.7048, "encoder_q-layer.10": 1108.6444, "encoder_q-layer.11": 2629.9468, "encoder_q-layer.2": 895.6745, "encoder_q-layer.3": 913.2265, "encoder_q-layer.4": 947.5836, "encoder_q-layer.5": 998.5837, "encoder_q-layer.6": 1129.3247, "encoder_q-layer.7": 1162.2493, "encoder_q-layer.8": 1324.8242, "encoder_q-layer.9": 1160.0421, "epoch": 0.5, "inbatch_neg_score": 0.3005, "inbatch_pos_score": 0.978, "learning_rate": 2.7222222222222223e-05, "loss": 3.2001, "norm_diff": 0.0607, "norm_loss": 0.0, "num_token_doc": 66.9885, "num_token_overlap": 15.779, "num_token_query": 42.1943, "num_token_union": 68.6204, "num_word_context": 202.6617, "num_word_doc": 50.0264, "num_word_query": 31.8908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1879.093, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2996, "query_norm": 1.3792, "queue_k_norm": 1.4411, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1943, "sent_len_1": 66.9885, "sent_len_max_0": 128.0, "sent_len_max_1": 188.41, "stdk": 0.0481, "stdq": 0.0444, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1881, "doc_norm": 1.4473, "encoder_q-embeddings": 1769.4041, "encoder_q-layer.0": 1229.1749, "encoder_q-layer.1": 1394.9596, "encoder_q-layer.10": 1257.6281, "encoder_q-layer.11": 2838.0735, "encoder_q-layer.2": 1553.4836, "encoder_q-layer.3": 1625.401, "encoder_q-layer.4": 1727.276, "encoder_q-layer.5": 1842.3229, "encoder_q-layer.6": 1952.6289, "encoder_q-layer.7": 1821.365, "encoder_q-layer.8": 1638.8419, "encoder_q-layer.9": 1267.1637, "epoch": 0.5, "inbatch_neg_score": 0.2895, "inbatch_pos_score": 0.9507, "learning_rate": 2.716666666666667e-05, "loss": 3.1881, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.6928, "num_token_overlap": 15.8096, "num_token_query": 42.2231, "num_token_union": 68.3846, "num_word_context": 201.9378, "num_word_doc": 49.7685, "num_word_query": 31.8723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2547.4066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2908, "query_norm": 1.3788, "queue_k_norm": 1.4401, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2231, "sent_len_1": 66.6928, "sent_len_max_0": 128.0, "sent_len_max_1": 188.645, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1782, "doc_norm": 1.4427, "encoder_q-embeddings": 1338.6754, "encoder_q-layer.0": 898.144, "encoder_q-layer.1": 982.8694, "encoder_q-layer.10": 1252.3744, "encoder_q-layer.11": 2706.5286, "encoder_q-layer.2": 1125.5345, "encoder_q-layer.3": 1139.2936, "encoder_q-layer.4": 1163.7557, "encoder_q-layer.5": 1196.2145, "encoder_q-layer.6": 1283.8235, "encoder_q-layer.7": 1270.3196, "encoder_q-layer.8": 1375.1456, "encoder_q-layer.9": 1197.3147, "epoch": 0.5, "inbatch_neg_score": 0.3006, "inbatch_pos_score": 0.9673, "learning_rate": 2.7111111111111114e-05, "loss": 3.1782, "norm_diff": 0.0399, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 15.8943, "num_token_query": 42.4216, "num_token_union": 68.5523, "num_word_context": 202.2714, "num_word_doc": 49.8574, "num_word_query": 31.9969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2053.0412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.404, "queue_k_norm": 1.4433, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4216, "sent_len_1": 66.883, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.8638, "stdk": 0.0482, "stdq": 0.0454, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1663, "doc_norm": 1.4424, "encoder_q-embeddings": 3232.7297, "encoder_q-layer.0": 2126.3064, "encoder_q-layer.1": 2485.0188, "encoder_q-layer.10": 1152.6705, "encoder_q-layer.11": 2717.2393, "encoder_q-layer.2": 2820.5144, "encoder_q-layer.3": 3041.1919, "encoder_q-layer.4": 2877.447, "encoder_q-layer.5": 3243.4629, "encoder_q-layer.6": 2797.8845, "encoder_q-layer.7": 2237.1089, "encoder_q-layer.8": 1850.9485, "encoder_q-layer.9": 1252.4397, "epoch": 0.5, "inbatch_neg_score": 0.3007, "inbatch_pos_score": 0.9746, "learning_rate": 2.7055555555555557e-05, "loss": 3.1663, "norm_diff": 0.0532, "norm_loss": 0.0, "num_token_doc": 66.8774, "num_token_overlap": 15.9107, "num_token_query": 42.4796, "num_token_union": 68.528, "num_word_context": 202.4239, "num_word_doc": 49.8959, "num_word_query": 32.0698, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3846.4908, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3022, "query_norm": 1.3892, "queue_k_norm": 1.4426, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4796, "sent_len_1": 66.8774, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.5188, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1892, "doc_norm": 1.4427, "encoder_q-embeddings": 1382.9269, "encoder_q-layer.0": 918.0543, "encoder_q-layer.1": 1023.9297, "encoder_q-layer.10": 1265.3398, "encoder_q-layer.11": 2816.4385, "encoder_q-layer.2": 1145.8744, "encoder_q-layer.3": 1259.7012, "encoder_q-layer.4": 1354.2361, "encoder_q-layer.5": 1369.2993, "encoder_q-layer.6": 1441.4465, "encoder_q-layer.7": 1460.3331, "encoder_q-layer.8": 1517.8967, "encoder_q-layer.9": 1247.303, "epoch": 0.5, "inbatch_neg_score": 0.3073, "inbatch_pos_score": 0.9819, "learning_rate": 2.7000000000000002e-05, "loss": 3.1892, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.6725, "num_token_overlap": 15.7363, "num_token_query": 42.2097, "num_token_union": 68.4371, "num_word_context": 202.4152, "num_word_doc": 49.7729, "num_word_query": 31.8773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2188.9087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3071, "query_norm": 1.3909, "queue_k_norm": 1.4433, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2097, "sent_len_1": 66.6725, "sent_len_max_0": 127.995, "sent_len_max_1": 188.22, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 52.3438, "active_queue_size": 16384.0, "cl_loss": 3.169, "doc_norm": 1.4431, "encoder_q-embeddings": 1116.1664, "encoder_q-layer.0": 741.9341, "encoder_q-layer.1": 783.2296, "encoder_q-layer.10": 1255.8185, "encoder_q-layer.11": 2676.0315, "encoder_q-layer.2": 897.2774, "encoder_q-layer.3": 841.2483, "encoder_q-layer.4": 895.618, "encoder_q-layer.5": 908.3717, "encoder_q-layer.6": 1017.9759, "encoder_q-layer.7": 1194.615, "encoder_q-layer.8": 1418.7793, "encoder_q-layer.9": 1238.7992, "epoch": 0.5, "inbatch_neg_score": 0.3015, "inbatch_pos_score": 0.9526, "learning_rate": 2.6944444444444445e-05, "loss": 3.169, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.6688, "num_token_overlap": 15.8848, "num_token_query": 42.546, "num_token_union": 68.5401, "num_word_context": 202.5894, "num_word_doc": 49.8028, "num_word_query": 32.1483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1862.7202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3008, "query_norm": 1.3836, "queue_k_norm": 1.4434, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.546, "sent_len_1": 66.6688, "sent_len_max_0": 128.0, "sent_len_max_1": 186.375, "stdk": 0.0482, "stdq": 0.0447, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1665, "doc_norm": 1.44, "encoder_q-embeddings": 2122.5513, "encoder_q-layer.0": 1449.4647, "encoder_q-layer.1": 1685.6858, "encoder_q-layer.10": 1287.1824, "encoder_q-layer.11": 2745.5139, "encoder_q-layer.2": 2033.5457, "encoder_q-layer.3": 2243.4026, "encoder_q-layer.4": 2445.6545, "encoder_q-layer.5": 2489.6162, "encoder_q-layer.6": 2822.689, "encoder_q-layer.7": 2603.6934, "encoder_q-layer.8": 2529.9829, "encoder_q-layer.9": 1417.3434, "epoch": 0.5, "inbatch_neg_score": 0.3002, "inbatch_pos_score": 0.9619, "learning_rate": 2.688888888888889e-05, "loss": 3.1665, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.5802, "num_token_overlap": 15.8425, "num_token_query": 42.4794, "num_token_union": 68.4302, "num_word_context": 202.3889, "num_word_doc": 49.6987, "num_word_query": 32.1145, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3261.6764, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3, "query_norm": 1.3916, "queue_k_norm": 1.4438, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4794, "sent_len_1": 66.5802, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.345, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1786, "doc_norm": 1.4458, "encoder_q-embeddings": 1086.301, "encoder_q-layer.0": 686.4348, "encoder_q-layer.1": 728.3462, "encoder_q-layer.10": 1170.5334, "encoder_q-layer.11": 2662.0559, "encoder_q-layer.2": 819.1372, "encoder_q-layer.3": 834.0067, "encoder_q-layer.4": 902.3559, "encoder_q-layer.5": 953.498, "encoder_q-layer.6": 983.1575, "encoder_q-layer.7": 1098.5117, "encoder_q-layer.8": 1244.496, "encoder_q-layer.9": 1173.5304, "epoch": 0.5, "inbatch_neg_score": 0.2985, "inbatch_pos_score": 0.9697, "learning_rate": 2.6833333333333333e-05, "loss": 3.1786, "norm_diff": 0.0522, "norm_loss": 0.0, "num_token_doc": 66.584, "num_token_overlap": 15.8426, "num_token_query": 42.4212, "num_token_union": 68.3439, "num_word_context": 201.8579, "num_word_doc": 49.6466, "num_word_query": 32.0062, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1812.7455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2974, "query_norm": 1.3937, "queue_k_norm": 1.4455, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4212, "sent_len_1": 66.584, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.2237, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1845, "doc_norm": 1.4479, "encoder_q-embeddings": 1187.3259, "encoder_q-layer.0": 783.03, "encoder_q-layer.1": 885.665, "encoder_q-layer.10": 1230.0486, "encoder_q-layer.11": 2739.1226, "encoder_q-layer.2": 990.6207, "encoder_q-layer.3": 938.049, "encoder_q-layer.4": 926.2454, "encoder_q-layer.5": 904.9713, "encoder_q-layer.6": 1008.839, "encoder_q-layer.7": 1114.5236, "encoder_q-layer.8": 1351.2769, "encoder_q-layer.9": 1199.9124, "epoch": 0.51, "inbatch_neg_score": 0.305, "inbatch_pos_score": 0.9712, "learning_rate": 2.677777777777778e-05, "loss": 3.1845, "norm_diff": 0.0445, "norm_loss": 0.0, "num_token_doc": 66.7217, "num_token_overlap": 15.7407, "num_token_query": 42.1384, "num_token_union": 68.4411, "num_word_context": 202.0591, "num_word_doc": 49.7822, "num_word_query": 31.8389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1877.2533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3042, "query_norm": 1.4034, "queue_k_norm": 1.4456, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1384, "sent_len_1": 66.7217, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.095, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.192, "doc_norm": 1.4393, "encoder_q-embeddings": 1254.0648, "encoder_q-layer.0": 807.8616, "encoder_q-layer.1": 872.3541, "encoder_q-layer.10": 1252.4436, "encoder_q-layer.11": 2821.804, "encoder_q-layer.2": 998.3353, "encoder_q-layer.3": 1081.136, "encoder_q-layer.4": 1075.5345, "encoder_q-layer.5": 1062.1636, "encoder_q-layer.6": 1116.1412, "encoder_q-layer.7": 1238.8721, "encoder_q-layer.8": 1357.9669, "encoder_q-layer.9": 1224.3094, "epoch": 0.51, "inbatch_neg_score": 0.3025, "inbatch_pos_score": 0.9658, "learning_rate": 2.6722222222222228e-05, "loss": 3.192, "norm_diff": 0.0292, "norm_loss": 0.0, "num_token_doc": 66.834, "num_token_overlap": 15.8312, "num_token_query": 42.2463, "num_token_union": 68.5451, "num_word_context": 202.4792, "num_word_doc": 49.8963, "num_word_query": 31.921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2004.3602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3015, "query_norm": 1.4101, "queue_k_norm": 1.4456, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2463, "sent_len_1": 66.834, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4013, "stdk": 0.048, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1916, "doc_norm": 1.445, "encoder_q-embeddings": 1205.1147, "encoder_q-layer.0": 837.1391, "encoder_q-layer.1": 941.3704, "encoder_q-layer.10": 1194.8447, "encoder_q-layer.11": 2635.3694, "encoder_q-layer.2": 1036.8894, "encoder_q-layer.3": 1055.5786, "encoder_q-layer.4": 1148.5674, "encoder_q-layer.5": 1098.1058, "encoder_q-layer.6": 1152.6831, "encoder_q-layer.7": 1172.3553, "encoder_q-layer.8": 1231.874, "encoder_q-layer.9": 1129.0634, "epoch": 0.51, "inbatch_neg_score": 0.3014, "inbatch_pos_score": 0.9585, "learning_rate": 2.6666666666666667e-05, "loss": 3.1916, "norm_diff": 0.0553, "norm_loss": 0.0, "num_token_doc": 67.0722, "num_token_overlap": 15.8303, "num_token_query": 42.2997, "num_token_union": 68.6323, "num_word_context": 202.5535, "num_word_doc": 49.9793, "num_word_query": 31.9334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1937.2522, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3005, "query_norm": 1.3898, "queue_k_norm": 1.4448, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2997, "sent_len_1": 67.0722, "sent_len_max_0": 127.9838, "sent_len_max_1": 192.5275, "stdk": 0.0482, "stdq": 0.0446, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1756, "doc_norm": 1.4546, "encoder_q-embeddings": 1337.1748, "encoder_q-layer.0": 905.8925, "encoder_q-layer.1": 959.7419, "encoder_q-layer.10": 1151.574, "encoder_q-layer.11": 2584.4424, "encoder_q-layer.2": 1086.6041, "encoder_q-layer.3": 1052.4656, "encoder_q-layer.4": 1077.1813, "encoder_q-layer.5": 1082.1497, "encoder_q-layer.6": 1123.2332, "encoder_q-layer.7": 1221.7411, "encoder_q-layer.8": 1364.6788, "encoder_q-layer.9": 1165.0773, "epoch": 0.51, "inbatch_neg_score": 0.3048, "inbatch_pos_score": 0.9893, "learning_rate": 2.6611111111111116e-05, "loss": 3.1756, "norm_diff": 0.034, "norm_loss": 0.0, "num_token_doc": 66.779, "num_token_overlap": 15.8526, "num_token_query": 42.3634, "num_token_union": 68.4795, "num_word_context": 202.266, "num_word_doc": 49.8188, "num_word_query": 32.0178, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1973.948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3049, "query_norm": 1.4206, "queue_k_norm": 1.4468, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3634, "sent_len_1": 66.779, "sent_len_max_0": 128.0, "sent_len_max_1": 187.8275, "stdk": 0.0486, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.1796, "doc_norm": 1.444, "encoder_q-embeddings": 1420.6287, "encoder_q-layer.0": 974.8321, "encoder_q-layer.1": 1064.8833, "encoder_q-layer.10": 1125.1486, "encoder_q-layer.11": 2645.207, "encoder_q-layer.2": 1213.2821, "encoder_q-layer.3": 1290.0647, "encoder_q-layer.4": 1380.8856, "encoder_q-layer.5": 1511.656, "encoder_q-layer.6": 1455.9926, "encoder_q-layer.7": 1516.9437, "encoder_q-layer.8": 1853.8801, "encoder_q-layer.9": 1381.0068, "epoch": 0.51, "inbatch_neg_score": 0.3045, "inbatch_pos_score": 0.9717, "learning_rate": 2.6555555555555555e-05, "loss": 3.1796, "norm_diff": 0.0401, "norm_loss": 0.0, "num_token_doc": 66.6933, "num_token_overlap": 15.8112, "num_token_query": 42.3467, "num_token_union": 68.4904, "num_word_context": 202.3609, "num_word_doc": 49.8237, "num_word_query": 32.0047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2270.0135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3032, "query_norm": 1.4039, "queue_k_norm": 1.4458, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3467, "sent_len_1": 66.6933, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9675, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1841, "doc_norm": 1.4481, "encoder_q-embeddings": 2626.1616, "encoder_q-layer.0": 1893.5331, "encoder_q-layer.1": 2078.5042, "encoder_q-layer.10": 1164.785, "encoder_q-layer.11": 2681.4736, "encoder_q-layer.2": 2294.8767, "encoder_q-layer.3": 2133.2266, "encoder_q-layer.4": 1988.1434, "encoder_q-layer.5": 1826.6328, "encoder_q-layer.6": 1993.291, "encoder_q-layer.7": 1711.479, "encoder_q-layer.8": 2053.0862, "encoder_q-layer.9": 1521.3424, "epoch": 0.51, "inbatch_neg_score": 0.3071, "inbatch_pos_score": 0.9946, "learning_rate": 2.6500000000000004e-05, "loss": 3.1841, "norm_diff": 0.0233, "norm_loss": 0.0, "num_token_doc": 66.929, "num_token_overlap": 15.8199, "num_token_query": 42.3258, "num_token_union": 68.5326, "num_word_context": 202.4508, "num_word_doc": 49.9079, "num_word_query": 31.9552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3125.3315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3066, "query_norm": 1.4269, "queue_k_norm": 1.4452, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3258, "sent_len_1": 66.929, "sent_len_max_0": 128.0, "sent_len_max_1": 192.85, "stdk": 0.0483, "stdq": 0.046, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.1659, "doc_norm": 1.4445, "encoder_q-embeddings": 1149.1639, "encoder_q-layer.0": 782.3029, "encoder_q-layer.1": 855.3306, "encoder_q-layer.10": 1141.5927, "encoder_q-layer.11": 2635.874, "encoder_q-layer.2": 960.0889, "encoder_q-layer.3": 999.1879, "encoder_q-layer.4": 1055.1094, "encoder_q-layer.5": 1097.5433, "encoder_q-layer.6": 1305.9354, "encoder_q-layer.7": 1347.384, "encoder_q-layer.8": 1471.9419, "encoder_q-layer.9": 1226.5651, "epoch": 0.51, "inbatch_neg_score": 0.3073, "inbatch_pos_score": 0.9976, "learning_rate": 2.6444444444444443e-05, "loss": 3.1659, "norm_diff": 0.057, "norm_loss": 0.0, "num_token_doc": 66.6399, "num_token_overlap": 15.8608, "num_token_query": 42.4153, "num_token_union": 68.4471, "num_word_context": 201.9269, "num_word_doc": 49.7175, "num_word_query": 32.0599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1960.9048, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3079, "query_norm": 1.3875, "queue_k_norm": 1.4457, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4153, "sent_len_1": 66.6399, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4013, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1725, "doc_norm": 1.4524, "encoder_q-embeddings": 1807.2876, "encoder_q-layer.0": 1209.4701, "encoder_q-layer.1": 1234.6213, "encoder_q-layer.10": 1228.0586, "encoder_q-layer.11": 2669.0503, "encoder_q-layer.2": 1384.2013, "encoder_q-layer.3": 1403.5226, "encoder_q-layer.4": 1445.6669, "encoder_q-layer.5": 1481.6613, "encoder_q-layer.6": 1555.8969, "encoder_q-layer.7": 1865.5817, "encoder_q-layer.8": 2353.4629, "encoder_q-layer.9": 1553.4701, "epoch": 0.51, "inbatch_neg_score": 0.3072, "inbatch_pos_score": 0.9941, "learning_rate": 2.6388888888888892e-05, "loss": 3.1725, "norm_diff": 0.0458, "norm_loss": 0.0, "num_token_doc": 66.9042, "num_token_overlap": 15.9573, "num_token_query": 42.559, "num_token_union": 68.5646, "num_word_context": 202.8128, "num_word_doc": 49.9423, "num_word_query": 32.1623, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2586.5792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3079, "query_norm": 1.4066, "queue_k_norm": 1.4461, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.559, "sent_len_1": 66.9042, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4863, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1736, "doc_norm": 1.4494, "encoder_q-embeddings": 1673.3983, "encoder_q-layer.0": 1117.4065, "encoder_q-layer.1": 1259.0721, "encoder_q-layer.10": 1341.9236, "encoder_q-layer.11": 2633.927, "encoder_q-layer.2": 1491.9813, "encoder_q-layer.3": 1644.0735, "encoder_q-layer.4": 1825.2837, "encoder_q-layer.5": 1843.7219, "encoder_q-layer.6": 1541.1586, "encoder_q-layer.7": 1426.5707, "encoder_q-layer.8": 1344.043, "encoder_q-layer.9": 1161.5409, "epoch": 0.51, "inbatch_neg_score": 0.3078, "inbatch_pos_score": 0.9653, "learning_rate": 2.633333333333333e-05, "loss": 3.1736, "norm_diff": 0.0564, "norm_loss": 0.0, "num_token_doc": 66.5736, "num_token_overlap": 15.7709, "num_token_query": 42.2994, "num_token_union": 68.4101, "num_word_context": 202.104, "num_word_doc": 49.6574, "num_word_query": 31.9523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2370.948, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3076, "query_norm": 1.3929, "queue_k_norm": 1.4477, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2994, "sent_len_1": 66.5736, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4725, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1548, "doc_norm": 1.4521, "encoder_q-embeddings": 1132.8837, "encoder_q-layer.0": 733.9807, "encoder_q-layer.1": 777.4813, "encoder_q-layer.10": 1248.1506, "encoder_q-layer.11": 2794.6165, "encoder_q-layer.2": 882.4829, "encoder_q-layer.3": 905.8488, "encoder_q-layer.4": 988.0999, "encoder_q-layer.5": 993.4087, "encoder_q-layer.6": 1078.0599, "encoder_q-layer.7": 1148.6647, "encoder_q-layer.8": 1388.6261, "encoder_q-layer.9": 1242.256, "epoch": 0.51, "inbatch_neg_score": 0.3105, "inbatch_pos_score": 0.9888, "learning_rate": 2.627777777777778e-05, "loss": 3.1548, "norm_diff": 0.0466, "norm_loss": 0.0, "num_token_doc": 66.9349, "num_token_overlap": 15.8567, "num_token_query": 42.3626, "num_token_union": 68.6166, "num_word_context": 202.4998, "num_word_doc": 49.9976, "num_word_query": 32.0067, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1914.1392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3118, "query_norm": 1.4054, "queue_k_norm": 1.446, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3626, "sent_len_1": 66.9349, "sent_len_max_0": 128.0, "sent_len_max_1": 187.72, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.1842, "doc_norm": 1.4509, "encoder_q-embeddings": 2665.6877, "encoder_q-layer.0": 1751.968, "encoder_q-layer.1": 1909.3588, "encoder_q-layer.10": 2678.1211, "encoder_q-layer.11": 5720.9937, "encoder_q-layer.2": 2206.8611, "encoder_q-layer.3": 2393.7104, "encoder_q-layer.4": 2565.1802, "encoder_q-layer.5": 2689.7844, "encoder_q-layer.6": 2947.4419, "encoder_q-layer.7": 2993.7354, "encoder_q-layer.8": 3418.1716, "encoder_q-layer.9": 2840.96, "epoch": 0.52, "inbatch_neg_score": 0.3212, "inbatch_pos_score": 0.9834, "learning_rate": 2.6222222222222226e-05, "loss": 3.1842, "norm_diff": 0.0473, "norm_loss": 0.0, "num_token_doc": 66.716, "num_token_overlap": 15.8108, "num_token_query": 42.3335, "num_token_union": 68.4402, "num_word_context": 202.312, "num_word_doc": 49.747, "num_word_query": 31.9164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4398.4348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3218, "query_norm": 1.4036, "queue_k_norm": 1.4479, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3335, "sent_len_1": 66.716, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3562, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1811, "doc_norm": 1.4487, "encoder_q-embeddings": 1614.9462, "encoder_q-layer.0": 1073.2422, "encoder_q-layer.1": 1263.3043, "encoder_q-layer.10": 1190.395, "encoder_q-layer.11": 2852.3901, "encoder_q-layer.2": 1504.4335, "encoder_q-layer.3": 1482.9572, "encoder_q-layer.4": 1539.9656, "encoder_q-layer.5": 1407.9786, "encoder_q-layer.6": 1403.9167, "encoder_q-layer.7": 1402.7869, "encoder_q-layer.8": 1517.6372, "encoder_q-layer.9": 1223.2373, "epoch": 0.52, "inbatch_neg_score": 0.32, "inbatch_pos_score": 0.9829, "learning_rate": 2.6166666666666668e-05, "loss": 3.1811, "norm_diff": 0.0647, "norm_loss": 0.0, "num_token_doc": 66.7945, "num_token_overlap": 15.7717, "num_token_query": 42.2734, "num_token_union": 68.5108, "num_word_context": 202.5168, "num_word_doc": 49.8515, "num_word_query": 31.9541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2329.1899, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3201, "query_norm": 1.384, "queue_k_norm": 1.4501, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2734, "sent_len_1": 66.7945, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.0888, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1999, "doc_norm": 1.4528, "encoder_q-embeddings": 1172.6827, "encoder_q-layer.0": 750.8718, "encoder_q-layer.1": 819.6752, "encoder_q-layer.10": 1217.55, "encoder_q-layer.11": 2726.4016, "encoder_q-layer.2": 929.6371, "encoder_q-layer.3": 996.9476, "encoder_q-layer.4": 1068.8159, "encoder_q-layer.5": 1161.1394, "encoder_q-layer.6": 1134.3975, "encoder_q-layer.7": 1256.9518, "encoder_q-layer.8": 1395.1416, "encoder_q-layer.9": 1188.2344, "epoch": 0.52, "inbatch_neg_score": 0.3306, "inbatch_pos_score": 1.0107, "learning_rate": 2.6111111111111114e-05, "loss": 3.1999, "norm_diff": 0.0516, "norm_loss": 0.0, "num_token_doc": 66.8164, "num_token_overlap": 15.7407, "num_token_query": 42.1408, "num_token_union": 68.4197, "num_word_context": 202.1461, "num_word_doc": 49.788, "num_word_query": 31.799, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1944.5269, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3293, "query_norm": 1.4012, "queue_k_norm": 1.4504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1408, "sent_len_1": 66.8164, "sent_len_max_0": 127.9988, "sent_len_max_1": 192.03, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1902, "doc_norm": 1.4503, "encoder_q-embeddings": 1409.8485, "encoder_q-layer.0": 961.9581, "encoder_q-layer.1": 1064.7609, "encoder_q-layer.10": 1145.7676, "encoder_q-layer.11": 2622.2678, "encoder_q-layer.2": 1218.625, "encoder_q-layer.3": 1246.5938, "encoder_q-layer.4": 1396.5654, "encoder_q-layer.5": 1428.3429, "encoder_q-layer.6": 1416.1289, "encoder_q-layer.7": 1431.8171, "encoder_q-layer.8": 1341.1672, "encoder_q-layer.9": 1132.931, "epoch": 0.52, "inbatch_neg_score": 0.3195, "inbatch_pos_score": 1.0068, "learning_rate": 2.6055555555555556e-05, "loss": 3.1902, "norm_diff": 0.0442, "norm_loss": 0.0, "num_token_doc": 66.5385, "num_token_overlap": 15.8071, "num_token_query": 42.414, "num_token_union": 68.4216, "num_word_context": 202.1359, "num_word_doc": 49.6384, "num_word_query": 32.0273, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2115.0595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3203, "query_norm": 1.4061, "queue_k_norm": 1.4499, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.414, "sent_len_1": 66.5385, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.3262, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 3.1959, "doc_norm": 1.4516, "encoder_q-embeddings": 1152.1466, "encoder_q-layer.0": 738.6297, "encoder_q-layer.1": 792.07, "encoder_q-layer.10": 1298.8303, "encoder_q-layer.11": 2744.3289, "encoder_q-layer.2": 919.8964, "encoder_q-layer.3": 971.2113, "encoder_q-layer.4": 998.7717, "encoder_q-layer.5": 1029.5464, "encoder_q-layer.6": 1170.1462, "encoder_q-layer.7": 1286.2574, "encoder_q-layer.8": 1432.5447, "encoder_q-layer.9": 1258.1714, "epoch": 0.52, "inbatch_neg_score": 0.3184, "inbatch_pos_score": 1.0078, "learning_rate": 2.6000000000000002e-05, "loss": 3.1959, "norm_diff": 0.0702, "norm_loss": 0.0, "num_token_doc": 66.6409, "num_token_overlap": 15.7604, "num_token_query": 42.2335, "num_token_union": 68.3678, "num_word_context": 201.9214, "num_word_doc": 49.6841, "num_word_query": 31.8814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1924.3169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3193, "query_norm": 1.3814, "queue_k_norm": 1.4503, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2335, "sent_len_1": 66.6409, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7912, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.2075, "doc_norm": 1.4493, "encoder_q-embeddings": 1320.2448, "encoder_q-layer.0": 899.9108, "encoder_q-layer.1": 982.3687, "encoder_q-layer.10": 1284.1084, "encoder_q-layer.11": 2825.8447, "encoder_q-layer.2": 1070.3234, "encoder_q-layer.3": 1159.3943, "encoder_q-layer.4": 1213.0487, "encoder_q-layer.5": 1094.3164, "encoder_q-layer.6": 1196.2073, "encoder_q-layer.7": 1215.4895, "encoder_q-layer.8": 1250.9292, "encoder_q-layer.9": 1163.6772, "epoch": 0.52, "inbatch_neg_score": 0.3227, "inbatch_pos_score": 0.9722, "learning_rate": 2.5944444444444444e-05, "loss": 3.2075, "norm_diff": 0.0577, "norm_loss": 0.0, "num_token_doc": 66.8912, "num_token_overlap": 15.7676, "num_token_query": 42.2705, "num_token_union": 68.6036, "num_word_context": 202.3563, "num_word_doc": 49.8672, "num_word_query": 31.9182, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2012.526, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3235, "query_norm": 1.3916, "queue_k_norm": 1.4502, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2705, "sent_len_1": 66.8912, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1138, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1837, "doc_norm": 1.46, "encoder_q-embeddings": 1501.9418, "encoder_q-layer.0": 955.5685, "encoder_q-layer.1": 1026.5605, "encoder_q-layer.10": 1267.4838, "encoder_q-layer.11": 2891.1323, "encoder_q-layer.2": 1182.8987, "encoder_q-layer.3": 1285.0691, "encoder_q-layer.4": 1398.8546, "encoder_q-layer.5": 1445.2407, "encoder_q-layer.6": 1510.9875, "encoder_q-layer.7": 1704.5664, "encoder_q-layer.8": 1557.0699, "encoder_q-layer.9": 1329.6307, "epoch": 0.52, "inbatch_neg_score": 0.3233, "inbatch_pos_score": 1.0117, "learning_rate": 2.588888888888889e-05, "loss": 3.1837, "norm_diff": 0.071, "norm_loss": 0.0, "num_token_doc": 66.6608, "num_token_overlap": 15.7861, "num_token_query": 42.2824, "num_token_union": 68.4637, "num_word_context": 202.4243, "num_word_doc": 49.7786, "num_word_query": 31.962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2263.6005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3235, "query_norm": 1.3889, "queue_k_norm": 1.4508, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2824, "sent_len_1": 66.6608, "sent_len_max_0": 127.99, "sent_len_max_1": 190.5813, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1602, "doc_norm": 1.4569, "encoder_q-embeddings": 1760.8475, "encoder_q-layer.0": 1157.1025, "encoder_q-layer.1": 1322.1692, "encoder_q-layer.10": 1184.7972, "encoder_q-layer.11": 2648.7246, "encoder_q-layer.2": 1513.8989, "encoder_q-layer.3": 1670.3378, "encoder_q-layer.4": 1593.3119, "encoder_q-layer.5": 1748.5997, "encoder_q-layer.6": 2143.1968, "encoder_q-layer.7": 2305.3049, "encoder_q-layer.8": 2642.4973, "encoder_q-layer.9": 1784.7333, "epoch": 0.52, "inbatch_neg_score": 0.3232, "inbatch_pos_score": 1.0039, "learning_rate": 2.5833333333333336e-05, "loss": 3.1602, "norm_diff": 0.067, "norm_loss": 0.0, "num_token_doc": 66.8629, "num_token_overlap": 15.9013, "num_token_query": 42.3842, "num_token_union": 68.5474, "num_word_context": 202.0249, "num_word_doc": 49.8612, "num_word_query": 32.0009, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2817.7918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3245, "query_norm": 1.3899, "queue_k_norm": 1.4531, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3842, "sent_len_1": 66.8629, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4412, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1568, "doc_norm": 1.4531, "encoder_q-embeddings": 1208.8646, "encoder_q-layer.0": 805.903, "encoder_q-layer.1": 915.6286, "encoder_q-layer.10": 1158.6066, "encoder_q-layer.11": 2721.3188, "encoder_q-layer.2": 1029.7452, "encoder_q-layer.3": 1069.8293, "encoder_q-layer.4": 1114.7445, "encoder_q-layer.5": 1129.1354, "encoder_q-layer.6": 1286.8302, "encoder_q-layer.7": 1421.4355, "encoder_q-layer.8": 1417.2062, "encoder_q-layer.9": 1182.0935, "epoch": 0.52, "inbatch_neg_score": 0.3142, "inbatch_pos_score": 0.9868, "learning_rate": 2.5777777777777778e-05, "loss": 3.1568, "norm_diff": 0.0435, "norm_loss": 0.0, "num_token_doc": 66.7767, "num_token_overlap": 15.8213, "num_token_query": 42.2795, "num_token_union": 68.4497, "num_word_context": 202.129, "num_word_doc": 49.8774, "num_word_query": 31.9279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1978.823, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.314, "query_norm": 1.4096, "queue_k_norm": 1.4518, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2795, "sent_len_1": 66.7767, "sent_len_max_0": 128.0, "sent_len_max_1": 189.515, "stdk": 0.0483, "stdq": 0.0458, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.173, "doc_norm": 1.4474, "encoder_q-embeddings": 1426.0765, "encoder_q-layer.0": 935.6581, "encoder_q-layer.1": 1050.7383, "encoder_q-layer.10": 1247.2058, "encoder_q-layer.11": 2854.8591, "encoder_q-layer.2": 1215.0834, "encoder_q-layer.3": 1315.4484, "encoder_q-layer.4": 1469.4913, "encoder_q-layer.5": 1552.0447, "encoder_q-layer.6": 1621.0708, "encoder_q-layer.7": 1667.5619, "encoder_q-layer.8": 1789.7327, "encoder_q-layer.9": 1374.7142, "epoch": 0.52, "inbatch_neg_score": 0.3252, "inbatch_pos_score": 0.9844, "learning_rate": 2.5722222222222224e-05, "loss": 3.173, "norm_diff": 0.068, "norm_loss": 0.0, "num_token_doc": 66.5837, "num_token_overlap": 15.8161, "num_token_query": 42.2675, "num_token_union": 68.2872, "num_word_context": 202.1326, "num_word_doc": 49.679, "num_word_query": 31.9317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2335.3767, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.325, "query_norm": 1.3795, "queue_k_norm": 1.4533, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2675, "sent_len_1": 66.5837, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5312, "stdk": 0.0481, "stdq": 0.0443, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.1685, "doc_norm": 1.4503, "encoder_q-embeddings": 1729.5464, "encoder_q-layer.0": 1213.62, "encoder_q-layer.1": 1479.3033, "encoder_q-layer.10": 1184.985, "encoder_q-layer.11": 2655.5107, "encoder_q-layer.2": 1989.9364, "encoder_q-layer.3": 2206.7095, "encoder_q-layer.4": 1812.83, "encoder_q-layer.5": 1494.6284, "encoder_q-layer.6": 1528.6917, "encoder_q-layer.7": 1482.2549, "encoder_q-layer.8": 1422.2734, "encoder_q-layer.9": 1155.713, "epoch": 0.53, "inbatch_neg_score": 0.322, "inbatch_pos_score": 0.9844, "learning_rate": 2.5666666666666666e-05, "loss": 3.1685, "norm_diff": 0.0571, "norm_loss": 0.0, "num_token_doc": 66.5164, "num_token_overlap": 15.8691, "num_token_query": 42.3756, "num_token_union": 68.4046, "num_word_context": 201.9633, "num_word_doc": 49.6562, "num_word_query": 31.9822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2562.6134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.321, "query_norm": 1.3932, "queue_k_norm": 1.4549, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3756, "sent_len_1": 66.5164, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5687, "stdk": 0.0482, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1672, "doc_norm": 1.4547, "encoder_q-embeddings": 1174.433, "encoder_q-layer.0": 854.6725, "encoder_q-layer.1": 869.926, "encoder_q-layer.10": 1122.2318, "encoder_q-layer.11": 2527.2104, "encoder_q-layer.2": 1009.4871, "encoder_q-layer.3": 1051.9095, "encoder_q-layer.4": 1072.4891, "encoder_q-layer.5": 1077.2926, "encoder_q-layer.6": 1125.3263, "encoder_q-layer.7": 1125.2993, "encoder_q-layer.8": 1228.5206, "encoder_q-layer.9": 1090.1893, "epoch": 0.53, "inbatch_neg_score": 0.3189, "inbatch_pos_score": 0.9951, "learning_rate": 2.5611111111111115e-05, "loss": 3.1672, "norm_diff": 0.0573, "norm_loss": 0.0, "num_token_doc": 66.676, "num_token_overlap": 15.8737, "num_token_query": 42.367, "num_token_union": 68.3841, "num_word_context": 202.1768, "num_word_doc": 49.7607, "num_word_query": 31.9981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1858.7298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3174, "query_norm": 1.3974, "queue_k_norm": 1.452, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.367, "sent_len_1": 66.676, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4462, "stdk": 0.0483, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1805, "doc_norm": 1.456, "encoder_q-embeddings": 1117.0935, "encoder_q-layer.0": 720.3044, "encoder_q-layer.1": 773.582, "encoder_q-layer.10": 1165.9246, "encoder_q-layer.11": 2837.1711, "encoder_q-layer.2": 879.5619, "encoder_q-layer.3": 927.4734, "encoder_q-layer.4": 984.7314, "encoder_q-layer.5": 1009.3533, "encoder_q-layer.6": 1136.6846, "encoder_q-layer.7": 1246.926, "encoder_q-layer.8": 1308.672, "encoder_q-layer.9": 1193.0277, "epoch": 0.53, "inbatch_neg_score": 0.3177, "inbatch_pos_score": 0.9995, "learning_rate": 2.5555555555555554e-05, "loss": 3.1805, "norm_diff": 0.0605, "norm_loss": 0.0, "num_token_doc": 66.7383, "num_token_overlap": 15.8225, "num_token_query": 42.3905, "num_token_union": 68.501, "num_word_context": 202.1893, "num_word_doc": 49.7992, "num_word_query": 32.0212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1922.9616, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3193, "query_norm": 1.3955, "queue_k_norm": 1.453, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3905, "sent_len_1": 66.7383, "sent_len_max_0": 127.9838, "sent_len_max_1": 190.0625, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1758, "doc_norm": 1.4562, "encoder_q-embeddings": 1319.0168, "encoder_q-layer.0": 897.9144, "encoder_q-layer.1": 1048.3152, "encoder_q-layer.10": 1259.9088, "encoder_q-layer.11": 2716.0413, "encoder_q-layer.2": 1237.2465, "encoder_q-layer.3": 1158.7897, "encoder_q-layer.4": 1189.6719, "encoder_q-layer.5": 1164.8951, "encoder_q-layer.6": 1204.968, "encoder_q-layer.7": 1247.7246, "encoder_q-layer.8": 1351.0017, "encoder_q-layer.9": 1140.6069, "epoch": 0.53, "inbatch_neg_score": 0.3206, "inbatch_pos_score": 0.9946, "learning_rate": 2.5500000000000003e-05, "loss": 3.1758, "norm_diff": 0.0719, "norm_loss": 0.0, "num_token_doc": 66.6629, "num_token_overlap": 15.9017, "num_token_query": 42.4654, "num_token_union": 68.4104, "num_word_context": 202.2426, "num_word_doc": 49.7928, "num_word_query": 32.0814, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.6012, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.3843, "queue_k_norm": 1.454, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4654, "sent_len_1": 66.6629, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2537, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1701, "doc_norm": 1.4547, "encoder_q-embeddings": 1076.8119, "encoder_q-layer.0": 693.8314, "encoder_q-layer.1": 765.2374, "encoder_q-layer.10": 1183.3683, "encoder_q-layer.11": 2749.5518, "encoder_q-layer.2": 874.5239, "encoder_q-layer.3": 915.3395, "encoder_q-layer.4": 999.7963, "encoder_q-layer.5": 995.7821, "encoder_q-layer.6": 1094.0566, "encoder_q-layer.7": 1107.7854, "encoder_q-layer.8": 1276.2131, "encoder_q-layer.9": 1147.4576, "epoch": 0.53, "inbatch_neg_score": 0.3154, "inbatch_pos_score": 0.9585, "learning_rate": 2.5444444444444442e-05, "loss": 3.1701, "norm_diff": 0.0874, "norm_loss": 0.0, "num_token_doc": 66.7806, "num_token_overlap": 15.8095, "num_token_query": 42.3156, "num_token_union": 68.5219, "num_word_context": 202.3311, "num_word_doc": 49.8548, "num_word_query": 31.9782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1846.6835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3154, "query_norm": 1.3674, "queue_k_norm": 1.453, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3156, "sent_len_1": 66.7806, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.035, "stdk": 0.0484, "stdq": 0.044, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.1708, "doc_norm": 1.4567, "encoder_q-embeddings": 1618.754, "encoder_q-layer.0": 1072.6969, "encoder_q-layer.1": 1158.3682, "encoder_q-layer.10": 1297.1575, "encoder_q-layer.11": 2824.1653, "encoder_q-layer.2": 1244.108, "encoder_q-layer.3": 1307.9709, "encoder_q-layer.4": 1285.0356, "encoder_q-layer.5": 1260.4797, "encoder_q-layer.6": 1342.7959, "encoder_q-layer.7": 1366.6479, "encoder_q-layer.8": 1439.3108, "encoder_q-layer.9": 1215.3129, "epoch": 0.53, "inbatch_neg_score": 0.3158, "inbatch_pos_score": 1.001, "learning_rate": 2.538888888888889e-05, "loss": 3.1708, "norm_diff": 0.0696, "norm_loss": 0.0, "num_token_doc": 66.7243, "num_token_overlap": 15.8032, "num_token_query": 42.3101, "num_token_union": 68.438, "num_word_context": 202.332, "num_word_doc": 49.7375, "num_word_query": 31.9379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2224.0135, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3147, "query_norm": 1.3871, "queue_k_norm": 1.4543, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3101, "sent_len_1": 66.7243, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.2175, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1764, "doc_norm": 1.4516, "encoder_q-embeddings": 1117.7158, "encoder_q-layer.0": 688.9675, "encoder_q-layer.1": 748.155, "encoder_q-layer.10": 1131.1346, "encoder_q-layer.11": 2711.9814, "encoder_q-layer.2": 799.4025, "encoder_q-layer.3": 805.9701, "encoder_q-layer.4": 825.8371, "encoder_q-layer.5": 831.1485, "encoder_q-layer.6": 945.5338, "encoder_q-layer.7": 1067.0935, "encoder_q-layer.8": 1216.1624, "encoder_q-layer.9": 1145.6515, "epoch": 0.53, "inbatch_neg_score": 0.3146, "inbatch_pos_score": 0.9741, "learning_rate": 2.5333333333333337e-05, "loss": 3.1764, "norm_diff": 0.0681, "norm_loss": 0.0, "num_token_doc": 66.6727, "num_token_overlap": 15.8708, "num_token_query": 42.4145, "num_token_union": 68.4184, "num_word_context": 202.0531, "num_word_doc": 49.7814, "num_word_query": 32.0756, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1818.5348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.314, "query_norm": 1.3835, "queue_k_norm": 1.4537, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4145, "sent_len_1": 66.6727, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8613, "stdk": 0.0482, "stdq": 0.0448, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1707, "doc_norm": 1.4534, "encoder_q-embeddings": 1951.7289, "encoder_q-layer.0": 1403.1584, "encoder_q-layer.1": 1592.665, "encoder_q-layer.10": 1098.5659, "encoder_q-layer.11": 2601.1672, "encoder_q-layer.2": 2120.9902, "encoder_q-layer.3": 2167.7314, "encoder_q-layer.4": 2104.6357, "encoder_q-layer.5": 2093.1252, "encoder_q-layer.6": 2205.2324, "encoder_q-layer.7": 1863.1597, "encoder_q-layer.8": 1449.6635, "encoder_q-layer.9": 1135.6288, "epoch": 0.53, "inbatch_neg_score": 0.3124, "inbatch_pos_score": 0.9995, "learning_rate": 2.527777777777778e-05, "loss": 3.1707, "norm_diff": 0.0465, "norm_loss": 0.0, "num_token_doc": 66.6797, "num_token_overlap": 15.8556, "num_token_query": 42.3555, "num_token_union": 68.4239, "num_word_context": 202.2133, "num_word_doc": 49.7203, "num_word_query": 31.9865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2826.6551, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.312, "query_norm": 1.4069, "queue_k_norm": 1.4539, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3555, "sent_len_1": 66.6797, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2587, "stdk": 0.0483, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1697, "doc_norm": 1.4501, "encoder_q-embeddings": 1120.4263, "encoder_q-layer.0": 714.5756, "encoder_q-layer.1": 774.7652, "encoder_q-layer.10": 1239.441, "encoder_q-layer.11": 2742.9119, "encoder_q-layer.2": 867.8064, "encoder_q-layer.3": 943.6531, "encoder_q-layer.4": 1009.3132, "encoder_q-layer.5": 1027.4653, "encoder_q-layer.6": 1108.8074, "encoder_q-layer.7": 1179.2021, "encoder_q-layer.8": 1358.2111, "encoder_q-layer.9": 1174.0023, "epoch": 0.53, "inbatch_neg_score": 0.3118, "inbatch_pos_score": 0.9771, "learning_rate": 2.5222222222222225e-05, "loss": 3.1697, "norm_diff": 0.0468, "norm_loss": 0.0, "num_token_doc": 66.8292, "num_token_overlap": 15.8238, "num_token_query": 42.2302, "num_token_union": 68.4284, "num_word_context": 202.6306, "num_word_doc": 49.8846, "num_word_query": 31.8993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1913.255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3115, "query_norm": 1.4033, "queue_k_norm": 1.4525, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2302, "sent_len_1": 66.8292, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.6775, "stdk": 0.0482, "stdq": 0.0457, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 52.5391, "active_queue_size": 16384.0, "cl_loss": 3.1553, "doc_norm": 1.4551, "encoder_q-embeddings": 6777.5684, "encoder_q-layer.0": 4755.5601, "encoder_q-layer.1": 5112.0259, "encoder_q-layer.10": 1226.4763, "encoder_q-layer.11": 2637.8167, "encoder_q-layer.2": 5533.9478, "encoder_q-layer.3": 5333.5063, "encoder_q-layer.4": 5244.0371, "encoder_q-layer.5": 4762.6851, "encoder_q-layer.6": 4304.2544, "encoder_q-layer.7": 3300.4456, "encoder_q-layer.8": 1827.9332, "encoder_q-layer.9": 1236.8182, "epoch": 0.53, "inbatch_neg_score": 0.3104, "inbatch_pos_score": 0.9688, "learning_rate": 2.5166666666666667e-05, "loss": 3.1553, "norm_diff": 0.0595, "norm_loss": 0.0, "num_token_doc": 66.6309, "num_token_overlap": 15.7847, "num_token_query": 42.2795, "num_token_union": 68.364, "num_word_context": 201.8559, "num_word_doc": 49.6945, "num_word_query": 31.9335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6734.5104, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3113, "query_norm": 1.3956, "queue_k_norm": 1.4523, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2795, "sent_len_1": 66.6309, "sent_len_max_0": 127.9963, "sent_len_max_1": 190.5312, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.1767, "doc_norm": 1.4577, "encoder_q-embeddings": 4374.2275, "encoder_q-layer.0": 3274.2798, "encoder_q-layer.1": 3682.5596, "encoder_q-layer.10": 1165.0477, "encoder_q-layer.11": 2686.0039, "encoder_q-layer.2": 4623.0894, "encoder_q-layer.3": 5256.5547, "encoder_q-layer.4": 5083.3872, "encoder_q-layer.5": 5416.7207, "encoder_q-layer.6": 5009.9868, "encoder_q-layer.7": 3857.572, "encoder_q-layer.8": 2526.9121, "encoder_q-layer.9": 1390.3396, "epoch": 0.54, "inbatch_neg_score": 0.312, "inbatch_pos_score": 1.001, "learning_rate": 2.5111111111111113e-05, "loss": 3.1767, "norm_diff": 0.0712, "norm_loss": 0.0, "num_token_doc": 66.5663, "num_token_overlap": 15.7604, "num_token_query": 42.2924, "num_token_union": 68.3987, "num_word_context": 202.0356, "num_word_doc": 49.7013, "num_word_query": 31.9291, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5768.0141, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.312, "query_norm": 1.3865, "queue_k_norm": 1.4535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2924, "sent_len_1": 66.5663, "sent_len_max_0": 128.0, "sent_len_max_1": 186.49, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1741, "doc_norm": 1.4515, "encoder_q-embeddings": 1906.5636, "encoder_q-layer.0": 1221.5814, "encoder_q-layer.1": 1244.5568, "encoder_q-layer.10": 2184.1521, "encoder_q-layer.11": 5091.8311, "encoder_q-layer.2": 1336.4238, "encoder_q-layer.3": 1377.7184, "encoder_q-layer.4": 1494.7428, "encoder_q-layer.5": 1524.4518, "encoder_q-layer.6": 1785.6333, "encoder_q-layer.7": 1943.002, "encoder_q-layer.8": 2276.8542, "encoder_q-layer.9": 2175.3481, "epoch": 0.54, "inbatch_neg_score": 0.312, "inbatch_pos_score": 0.9854, "learning_rate": 2.5055555555555555e-05, "loss": 3.1741, "norm_diff": 0.0743, "norm_loss": 0.0, "num_token_doc": 66.6486, "num_token_overlap": 15.8368, "num_token_query": 42.2622, "num_token_union": 68.3812, "num_word_context": 202.1944, "num_word_doc": 49.7419, "num_word_query": 31.8977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3338.5608, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3123, "query_norm": 1.3772, "queue_k_norm": 1.4536, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2622, "sent_len_1": 66.6486, "sent_len_max_0": 128.0, "sent_len_max_1": 190.325, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.1777, "doc_norm": 1.4532, "encoder_q-embeddings": 4263.8979, "encoder_q-layer.0": 3006.1255, "encoder_q-layer.1": 3501.7454, "encoder_q-layer.10": 2515.7114, "encoder_q-layer.11": 5451.2246, "encoder_q-layer.2": 4117.1392, "encoder_q-layer.3": 4456.2192, "encoder_q-layer.4": 4804.2559, "encoder_q-layer.5": 5240.0342, "encoder_q-layer.6": 4914.8164, "encoder_q-layer.7": 4641.9053, "encoder_q-layer.8": 4362.4048, "encoder_q-layer.9": 3035.6741, "epoch": 0.54, "inbatch_neg_score": 0.3113, "inbatch_pos_score": 0.9819, "learning_rate": 2.5e-05, "loss": 3.1777, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.8004, "num_token_overlap": 15.7976, "num_token_query": 42.3849, "num_token_union": 68.5622, "num_word_context": 202.3845, "num_word_doc": 49.8518, "num_word_query": 32.0204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6319.5979, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3108, "query_norm": 1.4004, "queue_k_norm": 1.4537, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3849, "sent_len_1": 66.8004, "sent_len_max_0": 127.985, "sent_len_max_1": 189.985, "stdk": 0.0483, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1539, "doc_norm": 1.457, "encoder_q-embeddings": 2515.1201, "encoder_q-layer.0": 1603.4973, "encoder_q-layer.1": 1954.5393, "encoder_q-layer.10": 2419.3296, "encoder_q-layer.11": 5337.1323, "encoder_q-layer.2": 2209.2561, "encoder_q-layer.3": 2243.5996, "encoder_q-layer.4": 2388.269, "encoder_q-layer.5": 2408.9016, "encoder_q-layer.6": 2548.582, "encoder_q-layer.7": 2727.6584, "encoder_q-layer.8": 3126.1599, "encoder_q-layer.9": 2584.2744, "epoch": 0.54, "inbatch_neg_score": 0.3116, "inbatch_pos_score": 1.0176, "learning_rate": 2.4944444444444447e-05, "loss": 3.1539, "norm_diff": 0.0452, "norm_loss": 0.0, "num_token_doc": 66.6322, "num_token_overlap": 15.8374, "num_token_query": 42.3094, "num_token_union": 68.3603, "num_word_context": 202.3068, "num_word_doc": 49.7113, "num_word_query": 31.9677, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4109.9628, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3118, "query_norm": 1.4117, "queue_k_norm": 1.452, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3094, "sent_len_1": 66.6322, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1875, "stdk": 0.0484, "stdq": 0.0459, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1714, "doc_norm": 1.4483, "encoder_q-embeddings": 1998.6349, "encoder_q-layer.0": 1285.4354, "encoder_q-layer.1": 1361.554, "encoder_q-layer.10": 2415.5291, "encoder_q-layer.11": 5546.5479, "encoder_q-layer.2": 1526.4424, "encoder_q-layer.3": 1565.0789, "encoder_q-layer.4": 1619.8608, "encoder_q-layer.5": 1732.3457, "encoder_q-layer.6": 1962.144, "encoder_q-layer.7": 2184.5188, "encoder_q-layer.8": 2540.3079, "encoder_q-layer.9": 2316.7148, "epoch": 0.54, "inbatch_neg_score": 0.3057, "inbatch_pos_score": 0.9619, "learning_rate": 2.488888888888889e-05, "loss": 3.1714, "norm_diff": 0.0641, "norm_loss": 0.0, "num_token_doc": 66.965, "num_token_overlap": 15.8521, "num_token_query": 42.2708, "num_token_union": 68.5159, "num_word_context": 202.3276, "num_word_doc": 49.9941, "num_word_query": 31.9118, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3612.5917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.304, "query_norm": 1.3842, "queue_k_norm": 1.4542, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2708, "sent_len_1": 66.965, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.2862, "stdk": 0.0481, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1481, "doc_norm": 1.4562, "encoder_q-embeddings": 3828.5588, "encoder_q-layer.0": 2684.2683, "encoder_q-layer.1": 3084.5935, "encoder_q-layer.10": 2367.0979, "encoder_q-layer.11": 5401.5415, "encoder_q-layer.2": 3615.3289, "encoder_q-layer.3": 3508.1033, "encoder_q-layer.4": 3184.8711, "encoder_q-layer.5": 3210.5396, "encoder_q-layer.6": 3081.7307, "encoder_q-layer.7": 3179.3123, "encoder_q-layer.8": 3310.5759, "encoder_q-layer.9": 2498.5583, "epoch": 0.54, "inbatch_neg_score": 0.3072, "inbatch_pos_score": 0.9824, "learning_rate": 2.4833333333333335e-05, "loss": 3.1481, "norm_diff": 0.0585, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 15.8842, "num_token_query": 42.4521, "num_token_union": 68.592, "num_word_context": 202.1414, "num_word_doc": 49.8977, "num_word_query": 32.0622, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5107.5033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3086, "query_norm": 1.3977, "queue_k_norm": 1.4545, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4521, "sent_len_1": 66.883, "sent_len_max_0": 128.0, "sent_len_max_1": 189.77, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.1607, "doc_norm": 1.457, "encoder_q-embeddings": 7606.3916, "encoder_q-layer.0": 5482.125, "encoder_q-layer.1": 5182.376, "encoder_q-layer.10": 2574.0164, "encoder_q-layer.11": 5619.832, "encoder_q-layer.2": 5201.7285, "encoder_q-layer.3": 4247.7231, "encoder_q-layer.4": 4330.0967, "encoder_q-layer.5": 3649.2744, "encoder_q-layer.6": 3046.2642, "encoder_q-layer.7": 3226.3164, "encoder_q-layer.8": 3488.8848, "encoder_q-layer.9": 2851.1492, "epoch": 0.54, "inbatch_neg_score": 0.3139, "inbatch_pos_score": 1.0068, "learning_rate": 2.477777777777778e-05, "loss": 3.1607, "norm_diff": 0.0297, "norm_loss": 0.0, "num_token_doc": 66.8381, "num_token_overlap": 15.8925, "num_token_query": 42.4336, "num_token_union": 68.4963, "num_word_context": 202.2739, "num_word_doc": 49.9146, "num_word_query": 32.0778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7124.2244, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3145, "query_norm": 1.4273, "queue_k_norm": 1.4519, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4336, "sent_len_1": 66.8381, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1138, "stdk": 0.0485, "stdq": 0.0464, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1391, "doc_norm": 1.4522, "encoder_q-embeddings": 2506.5122, "encoder_q-layer.0": 1628.515, "encoder_q-layer.1": 1727.9435, "encoder_q-layer.10": 2721.3296, "encoder_q-layer.11": 6030.3809, "encoder_q-layer.2": 1988.673, "encoder_q-layer.3": 2030.0406, "encoder_q-layer.4": 2122.7422, "encoder_q-layer.5": 2291.1216, "encoder_q-layer.6": 2419.9521, "encoder_q-layer.7": 2813.7681, "encoder_q-layer.8": 2955.7356, "encoder_q-layer.9": 2602.1787, "epoch": 0.54, "inbatch_neg_score": 0.3115, "inbatch_pos_score": 0.9854, "learning_rate": 2.4722222222222223e-05, "loss": 3.1391, "norm_diff": 0.0641, "norm_loss": 0.0, "num_token_doc": 66.6738, "num_token_overlap": 15.8681, "num_token_query": 42.3656, "num_token_union": 68.3959, "num_word_context": 202.0403, "num_word_doc": 49.768, "num_word_query": 32.0068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.7794, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3123, "query_norm": 1.3881, "queue_k_norm": 1.4524, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3656, "sent_len_1": 66.6738, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4212, "stdk": 0.0483, "stdq": 0.0447, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1838, "doc_norm": 1.451, "encoder_q-embeddings": 1406.48, "encoder_q-layer.0": 943.39, "encoder_q-layer.1": 1013.9041, "encoder_q-layer.10": 1223.0442, "encoder_q-layer.11": 2782.4802, "encoder_q-layer.2": 1136.7999, "encoder_q-layer.3": 1065.3756, "encoder_q-layer.4": 1049.3813, "encoder_q-layer.5": 1031.9301, "encoder_q-layer.6": 1021.8312, "encoder_q-layer.7": 1161.1357, "encoder_q-layer.8": 1313.8362, "encoder_q-layer.9": 1222.0822, "epoch": 0.54, "inbatch_neg_score": 0.3113, "inbatch_pos_score": 0.9614, "learning_rate": 2.466666666666667e-05, "loss": 3.1838, "norm_diff": 0.0712, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 15.7933, "num_token_query": 42.291, "num_token_union": 68.3538, "num_word_context": 202.0012, "num_word_doc": 49.693, "num_word_query": 31.9355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2037.2549, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3113, "query_norm": 1.3798, "queue_k_norm": 1.4528, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.291, "sent_len_1": 66.6003, "sent_len_max_0": 127.9663, "sent_len_max_1": 188.6975, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.1775, "doc_norm": 1.4535, "encoder_q-embeddings": 1006.542, "encoder_q-layer.0": 676.5603, "encoder_q-layer.1": 706.3033, "encoder_q-layer.10": 1293.9917, "encoder_q-layer.11": 2890.3706, "encoder_q-layer.2": 792.4155, "encoder_q-layer.3": 855.1965, "encoder_q-layer.4": 924.67, "encoder_q-layer.5": 979.08, "encoder_q-layer.6": 1133.1766, "encoder_q-layer.7": 1293.2349, "encoder_q-layer.8": 1423.0006, "encoder_q-layer.9": 1227.4036, "epoch": 0.54, "inbatch_neg_score": 0.3113, "inbatch_pos_score": 0.9824, "learning_rate": 2.461111111111111e-05, "loss": 3.1775, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.6528, "num_token_overlap": 15.7933, "num_token_query": 42.3865, "num_token_union": 68.4631, "num_word_context": 202.3313, "num_word_doc": 49.7686, "num_word_query": 32.0149, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1929.9619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3115, "query_norm": 1.385, "queue_k_norm": 1.4522, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3865, "sent_len_1": 66.6528, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5037, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.177, "doc_norm": 1.4589, "encoder_q-embeddings": 1298.4388, "encoder_q-layer.0": 903.6609, "encoder_q-layer.1": 1035.1637, "encoder_q-layer.10": 1249.9781, "encoder_q-layer.11": 2805.0527, "encoder_q-layer.2": 1234.4806, "encoder_q-layer.3": 1297.7921, "encoder_q-layer.4": 1371.5977, "encoder_q-layer.5": 1511.0729, "encoder_q-layer.6": 1621.572, "encoder_q-layer.7": 1670.2554, "encoder_q-layer.8": 2092.252, "encoder_q-layer.9": 1546.7385, "epoch": 0.54, "inbatch_neg_score": 0.3149, "inbatch_pos_score": 0.9863, "learning_rate": 2.4555555555555557e-05, "loss": 3.177, "norm_diff": 0.0531, "norm_loss": 0.0, "num_token_doc": 66.8277, "num_token_overlap": 15.8544, "num_token_query": 42.2224, "num_token_union": 68.3894, "num_word_context": 202.1621, "num_word_doc": 49.8345, "num_word_query": 31.8798, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2320.5767, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3152, "query_norm": 1.4059, "queue_k_norm": 1.4545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2224, "sent_len_1": 66.8277, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.6062, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1558, "doc_norm": 1.4568, "encoder_q-embeddings": 1093.9977, "encoder_q-layer.0": 695.6987, "encoder_q-layer.1": 719.2173, "encoder_q-layer.10": 1204.6108, "encoder_q-layer.11": 2830.6316, "encoder_q-layer.2": 787.7825, "encoder_q-layer.3": 843.5927, "encoder_q-layer.4": 899.0082, "encoder_q-layer.5": 947.8356, "encoder_q-layer.6": 1080.2979, "encoder_q-layer.7": 1264.2628, "encoder_q-layer.8": 1402.1731, "encoder_q-layer.9": 1233.7646, "epoch": 0.55, "inbatch_neg_score": 0.3188, "inbatch_pos_score": 0.98, "learning_rate": 2.45e-05, "loss": 3.1558, "norm_diff": 0.0491, "norm_loss": 0.0, "num_token_doc": 66.8349, "num_token_overlap": 15.8133, "num_token_query": 42.29, "num_token_union": 68.4894, "num_word_context": 202.1586, "num_word_doc": 49.8251, "num_word_query": 31.9622, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1895.8682, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3167, "query_norm": 1.4077, "queue_k_norm": 1.4533, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.29, "sent_len_1": 66.8349, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.2713, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1797, "doc_norm": 1.4496, "encoder_q-embeddings": 969.8577, "encoder_q-layer.0": 636.7833, "encoder_q-layer.1": 667.7646, "encoder_q-layer.10": 1155.42, "encoder_q-layer.11": 2611.6895, "encoder_q-layer.2": 737.2806, "encoder_q-layer.3": 779.1312, "encoder_q-layer.4": 818.8378, "encoder_q-layer.5": 841.4004, "encoder_q-layer.6": 962.7994, "encoder_q-layer.7": 1097.9779, "encoder_q-layer.8": 1268.5933, "encoder_q-layer.9": 1166.2714, "epoch": 0.55, "inbatch_neg_score": 0.3252, "inbatch_pos_score": 0.9951, "learning_rate": 2.4444444444444445e-05, "loss": 3.1797, "norm_diff": 0.0378, "norm_loss": 0.0, "num_token_doc": 66.7676, "num_token_overlap": 15.79, "num_token_query": 42.2054, "num_token_union": 68.4225, "num_word_context": 202.4672, "num_word_doc": 49.7553, "num_word_query": 31.8456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1735.0835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3252, "query_norm": 1.4118, "queue_k_norm": 1.4533, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2054, "sent_len_1": 66.7676, "sent_len_max_0": 128.0, "sent_len_max_1": 190.15, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1782, "doc_norm": 1.4559, "encoder_q-embeddings": 1734.8278, "encoder_q-layer.0": 1149.0001, "encoder_q-layer.1": 1307.9641, "encoder_q-layer.10": 1161.9148, "encoder_q-layer.11": 2719.8723, "encoder_q-layer.2": 1502.9331, "encoder_q-layer.3": 1636.7739, "encoder_q-layer.4": 1711.3237, "encoder_q-layer.5": 1536.4178, "encoder_q-layer.6": 1574.6379, "encoder_q-layer.7": 1493.3547, "encoder_q-layer.8": 1665.5444, "encoder_q-layer.9": 1363.1829, "epoch": 0.55, "inbatch_neg_score": 0.3232, "inbatch_pos_score": 1.0088, "learning_rate": 2.4388888888888887e-05, "loss": 3.1782, "norm_diff": 0.0388, "norm_loss": 0.0, "num_token_doc": 66.8038, "num_token_overlap": 15.7496, "num_token_query": 42.1795, "num_token_union": 68.5079, "num_word_context": 202.1854, "num_word_doc": 49.8778, "num_word_query": 31.8535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2444.1141, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3228, "query_norm": 1.4171, "queue_k_norm": 1.453, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1795, "sent_len_1": 66.8038, "sent_len_max_0": 127.99, "sent_len_max_1": 189.3212, "stdk": 0.0484, "stdq": 0.0455, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 51.2695, "active_queue_size": 16384.0, "cl_loss": 3.1544, "doc_norm": 1.4517, "encoder_q-embeddings": 1176.7533, "encoder_q-layer.0": 783.2628, "encoder_q-layer.1": 848.6365, "encoder_q-layer.10": 1212.7084, "encoder_q-layer.11": 2851.3513, "encoder_q-layer.2": 925.2191, "encoder_q-layer.3": 980.3622, "encoder_q-layer.4": 1109.4805, "encoder_q-layer.5": 1136.5358, "encoder_q-layer.6": 1239.5724, "encoder_q-layer.7": 1307.5114, "encoder_q-layer.8": 1517.9264, "encoder_q-layer.9": 1321.4513, "epoch": 0.55, "inbatch_neg_score": 0.3242, "inbatch_pos_score": 0.9702, "learning_rate": 2.4333333333333336e-05, "loss": 3.1544, "norm_diff": 0.0664, "norm_loss": 0.0, "num_token_doc": 66.8728, "num_token_overlap": 15.8173, "num_token_query": 42.2524, "num_token_union": 68.5214, "num_word_context": 202.6724, "num_word_doc": 49.9327, "num_word_query": 31.9279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.2226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3247, "query_norm": 1.3852, "queue_k_norm": 1.4537, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2524, "sent_len_1": 66.8728, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.4725, "stdk": 0.0482, "stdq": 0.0442, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.141, "doc_norm": 1.4463, "encoder_q-embeddings": 1217.4469, "encoder_q-layer.0": 808.7928, "encoder_q-layer.1": 866.8376, "encoder_q-layer.10": 1160.2129, "encoder_q-layer.11": 2597.0391, "encoder_q-layer.2": 1001.0422, "encoder_q-layer.3": 1057.4152, "encoder_q-layer.4": 1106.8291, "encoder_q-layer.5": 1111.3204, "encoder_q-layer.6": 1188.9413, "encoder_q-layer.7": 1198.7606, "encoder_q-layer.8": 1305.1178, "encoder_q-layer.9": 1114.0006, "epoch": 0.55, "inbatch_neg_score": 0.3286, "inbatch_pos_score": 1.0039, "learning_rate": 2.427777777777778e-05, "loss": 3.141, "norm_diff": 0.0397, "norm_loss": 0.0, "num_token_doc": 66.8103, "num_token_overlap": 15.8808, "num_token_query": 42.4956, "num_token_union": 68.5266, "num_word_context": 202.5888, "num_word_doc": 49.8573, "num_word_query": 32.0667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1920.0492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3276, "query_norm": 1.4065, "queue_k_norm": 1.4556, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4956, "sent_len_1": 66.8103, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.8313, "stdk": 0.048, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1447, "doc_norm": 1.4577, "encoder_q-embeddings": 1054.879, "encoder_q-layer.0": 692.8329, "encoder_q-layer.1": 738.6816, "encoder_q-layer.10": 1187.163, "encoder_q-layer.11": 2771.718, "encoder_q-layer.2": 827.0835, "encoder_q-layer.3": 856.847, "encoder_q-layer.4": 893.1127, "encoder_q-layer.5": 921.3588, "encoder_q-layer.6": 1010.27, "encoder_q-layer.7": 1164.3103, "encoder_q-layer.8": 1275.0609, "encoder_q-layer.9": 1162.1152, "epoch": 0.55, "inbatch_neg_score": 0.3288, "inbatch_pos_score": 1.0195, "learning_rate": 2.4222222222222224e-05, "loss": 3.1447, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 67.0081, "num_token_overlap": 15.8713, "num_token_query": 42.6088, "num_token_union": 68.7387, "num_word_context": 202.6207, "num_word_doc": 49.977, "num_word_query": 32.1917, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1859.045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.4065, "queue_k_norm": 1.4545, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.6088, "sent_len_1": 67.0081, "sent_len_max_0": 128.0, "sent_len_max_1": 189.375, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1497, "doc_norm": 1.4548, "encoder_q-embeddings": 1198.8719, "encoder_q-layer.0": 769.1196, "encoder_q-layer.1": 865.1915, "encoder_q-layer.10": 1161.2841, "encoder_q-layer.11": 2754.7998, "encoder_q-layer.2": 991.2438, "encoder_q-layer.3": 1062.5618, "encoder_q-layer.4": 1089.9226, "encoder_q-layer.5": 1059.0135, "encoder_q-layer.6": 1124.9061, "encoder_q-layer.7": 1274.5776, "encoder_q-layer.8": 1270.9298, "encoder_q-layer.9": 1164.7667, "epoch": 0.55, "inbatch_neg_score": 0.3273, "inbatch_pos_score": 1.0107, "learning_rate": 2.4166666666666667e-05, "loss": 3.1497, "norm_diff": 0.0483, "norm_loss": 0.0, "num_token_doc": 66.7709, "num_token_overlap": 15.8181, "num_token_query": 42.3294, "num_token_union": 68.4938, "num_word_context": 202.2096, "num_word_doc": 49.8048, "num_word_query": 31.9817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1945.1824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3269, "query_norm": 1.4065, "queue_k_norm": 1.4563, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3294, "sent_len_1": 66.7709, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4187, "stdk": 0.0483, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 3.1363, "doc_norm": 1.456, "encoder_q-embeddings": 2069.8352, "encoder_q-layer.0": 1338.6714, "encoder_q-layer.1": 1514.6655, "encoder_q-layer.10": 1120.8423, "encoder_q-layer.11": 2646.0876, "encoder_q-layer.2": 1878.4492, "encoder_q-layer.3": 2096.1555, "encoder_q-layer.4": 2225.1682, "encoder_q-layer.5": 2148.9941, "encoder_q-layer.6": 2158.6042, "encoder_q-layer.7": 1627.5491, "encoder_q-layer.8": 1333.918, "encoder_q-layer.9": 1145.5525, "epoch": 0.55, "inbatch_neg_score": 0.3232, "inbatch_pos_score": 1.0244, "learning_rate": 2.4111111111111113e-05, "loss": 3.1363, "norm_diff": 0.0569, "norm_loss": 0.0, "num_token_doc": 66.9371, "num_token_overlap": 15.8926, "num_token_query": 42.5059, "num_token_union": 68.6733, "num_word_context": 202.886, "num_word_doc": 49.9673, "num_word_query": 32.1397, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2757.7505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3242, "query_norm": 1.3992, "queue_k_norm": 1.4578, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5059, "sent_len_1": 66.9371, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4688, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.1353, "doc_norm": 1.4577, "encoder_q-embeddings": 1177.6371, "encoder_q-layer.0": 774.832, "encoder_q-layer.1": 847.9894, "encoder_q-layer.10": 1208.3109, "encoder_q-layer.11": 2668.8828, "encoder_q-layer.2": 945.5792, "encoder_q-layer.3": 1023.7709, "encoder_q-layer.4": 1121.2074, "encoder_q-layer.5": 1133.7751, "encoder_q-layer.6": 1216.1748, "encoder_q-layer.7": 1306.6176, "encoder_q-layer.8": 1424.0179, "encoder_q-layer.9": 1200.7533, "epoch": 0.55, "inbatch_neg_score": 0.3265, "inbatch_pos_score": 0.9829, "learning_rate": 2.4055555555555555e-05, "loss": 3.1353, "norm_diff": 0.0628, "norm_loss": 0.0, "num_token_doc": 66.8376, "num_token_overlap": 15.9114, "num_token_query": 42.4572, "num_token_union": 68.5036, "num_word_context": 202.2727, "num_word_doc": 49.85, "num_word_query": 32.0744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1964.33, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3257, "query_norm": 1.3949, "queue_k_norm": 1.4574, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4572, "sent_len_1": 66.8376, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5675, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1437, "doc_norm": 1.4579, "encoder_q-embeddings": 1115.415, "encoder_q-layer.0": 741.9664, "encoder_q-layer.1": 808.8511, "encoder_q-layer.10": 1219.5415, "encoder_q-layer.11": 2800.146, "encoder_q-layer.2": 890.2505, "encoder_q-layer.3": 934.5461, "encoder_q-layer.4": 995.4668, "encoder_q-layer.5": 966.3051, "encoder_q-layer.6": 1038.0743, "encoder_q-layer.7": 1150.6893, "encoder_q-layer.8": 1284.7925, "encoder_q-layer.9": 1149.2115, "epoch": 0.55, "inbatch_neg_score": 0.3302, "inbatch_pos_score": 0.9927, "learning_rate": 2.4e-05, "loss": 3.1437, "norm_diff": 0.064, "norm_loss": 0.0, "num_token_doc": 66.8143, "num_token_overlap": 15.8946, "num_token_query": 42.4339, "num_token_union": 68.5102, "num_word_context": 202.4493, "num_word_doc": 49.9231, "num_word_query": 32.0718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1887.2501, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3296, "query_norm": 1.3939, "queue_k_norm": 1.455, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4339, "sent_len_1": 66.8143, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.265, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.1568, "doc_norm": 1.4561, "encoder_q-embeddings": 1406.8436, "encoder_q-layer.0": 916.4083, "encoder_q-layer.1": 982.8682, "encoder_q-layer.10": 1240.6903, "encoder_q-layer.11": 2993.2549, "encoder_q-layer.2": 1208.5709, "encoder_q-layer.3": 1311.1904, "encoder_q-layer.4": 1351.0511, "encoder_q-layer.5": 1417.783, "encoder_q-layer.6": 1569.4954, "encoder_q-layer.7": 1508.3314, "encoder_q-layer.8": 1488.2694, "encoder_q-layer.9": 1311.0883, "epoch": 0.56, "inbatch_neg_score": 0.3224, "inbatch_pos_score": 0.98, "learning_rate": 2.3944444444444443e-05, "loss": 3.1568, "norm_diff": 0.0737, "norm_loss": 0.0, "num_token_doc": 66.7635, "num_token_overlap": 15.8489, "num_token_query": 42.5196, "num_token_union": 68.5905, "num_word_context": 202.4247, "num_word_doc": 49.7783, "num_word_query": 32.1315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2245.216, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3215, "query_norm": 1.3825, "queue_k_norm": 1.4576, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5196, "sent_len_1": 66.7635, "sent_len_max_0": 128.0, "sent_len_max_1": 191.64, "stdk": 0.0484, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1719, "doc_norm": 1.4564, "encoder_q-embeddings": 1858.1085, "encoder_q-layer.0": 1278.2434, "encoder_q-layer.1": 1438.328, "encoder_q-layer.10": 1179.9587, "encoder_q-layer.11": 2802.9517, "encoder_q-layer.2": 1730.9922, "encoder_q-layer.3": 1953.8734, "encoder_q-layer.4": 2042.3239, "encoder_q-layer.5": 2126.4482, "encoder_q-layer.6": 2118.7239, "encoder_q-layer.7": 2098.8276, "encoder_q-layer.8": 1548.8844, "encoder_q-layer.9": 1258.7272, "epoch": 0.56, "inbatch_neg_score": 0.3164, "inbatch_pos_score": 0.9761, "learning_rate": 2.3888888888888892e-05, "loss": 3.1719, "norm_diff": 0.0929, "norm_loss": 0.0, "num_token_doc": 66.7177, "num_token_overlap": 15.7451, "num_token_query": 42.3312, "num_token_union": 68.4912, "num_word_context": 202.6424, "num_word_doc": 49.7655, "num_word_query": 31.9701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2773.984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3159, "query_norm": 1.3636, "queue_k_norm": 1.4591, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3312, "sent_len_1": 66.7177, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0563, "stdk": 0.0483, "stdq": 0.0441, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.1589, "doc_norm": 1.454, "encoder_q-embeddings": 1210.3951, "encoder_q-layer.0": 841.9871, "encoder_q-layer.1": 884.0165, "encoder_q-layer.10": 1163.6012, "encoder_q-layer.11": 2631.0393, "encoder_q-layer.2": 915.7745, "encoder_q-layer.3": 988.9265, "encoder_q-layer.4": 1006.4873, "encoder_q-layer.5": 1067.0947, "encoder_q-layer.6": 1149.4056, "encoder_q-layer.7": 1262.8927, "encoder_q-layer.8": 1379.7808, "encoder_q-layer.9": 1197.7437, "epoch": 0.56, "inbatch_neg_score": 0.3125, "inbatch_pos_score": 0.9712, "learning_rate": 2.3833333333333334e-05, "loss": 3.1589, "norm_diff": 0.0761, "norm_loss": 0.0, "num_token_doc": 66.9613, "num_token_overlap": 15.7878, "num_token_query": 42.2121, "num_token_union": 68.5543, "num_word_context": 202.4032, "num_word_doc": 49.9719, "num_word_query": 31.873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1921.9633, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.312, "query_norm": 1.3779, "queue_k_norm": 1.4566, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2121, "sent_len_1": 66.9613, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.3413, "stdk": 0.0483, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1513, "doc_norm": 1.451, "encoder_q-embeddings": 2569.5994, "encoder_q-layer.0": 1679.8788, "encoder_q-layer.1": 2064.0112, "encoder_q-layer.10": 1321.4443, "encoder_q-layer.11": 2897.728, "encoder_q-layer.2": 2488.5872, "encoder_q-layer.3": 2572.541, "encoder_q-layer.4": 2964.905, "encoder_q-layer.5": 3006.9749, "encoder_q-layer.6": 3315.5024, "encoder_q-layer.7": 3097.6067, "encoder_q-layer.8": 2169.5769, "encoder_q-layer.9": 1386.2533, "epoch": 0.56, "inbatch_neg_score": 0.3157, "inbatch_pos_score": 0.9829, "learning_rate": 2.377777777777778e-05, "loss": 3.1513, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.6778, "num_token_overlap": 15.8219, "num_token_query": 42.3175, "num_token_union": 68.3999, "num_word_context": 202.1814, "num_word_doc": 49.7887, "num_word_query": 31.98, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3703.1553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3157, "query_norm": 1.3866, "queue_k_norm": 1.4575, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3175, "sent_len_1": 66.6778, "sent_len_max_0": 127.995, "sent_len_max_1": 189.23, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.149, "doc_norm": 1.4625, "encoder_q-embeddings": 1663.171, "encoder_q-layer.0": 1106.7908, "encoder_q-layer.1": 1198.1449, "encoder_q-layer.10": 572.5175, "encoder_q-layer.11": 1366.875, "encoder_q-layer.2": 1359.6721, "encoder_q-layer.3": 1459.6659, "encoder_q-layer.4": 1629.0344, "encoder_q-layer.5": 1636.3236, "encoder_q-layer.6": 1621.8339, "encoder_q-layer.7": 1531.9805, "encoder_q-layer.8": 1138.9498, "encoder_q-layer.9": 724.8253, "epoch": 0.56, "inbatch_neg_score": 0.3227, "inbatch_pos_score": 1.0068, "learning_rate": 2.3722222222222222e-05, "loss": 3.149, "norm_diff": 0.056, "norm_loss": 0.0, "num_token_doc": 66.7509, "num_token_overlap": 15.8876, "num_token_query": 42.4511, "num_token_union": 68.4843, "num_word_context": 202.5056, "num_word_doc": 49.8343, "num_word_query": 32.0834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2056.603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.323, "query_norm": 1.4065, "queue_k_norm": 1.4578, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4511, "sent_len_1": 66.7509, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0425, "stdk": 0.0486, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.1559, "doc_norm": 1.451, "encoder_q-embeddings": 590.3681, "encoder_q-layer.0": 426.1135, "encoder_q-layer.1": 456.7361, "encoder_q-layer.10": 625.8138, "encoder_q-layer.11": 1441.5677, "encoder_q-layer.2": 507.0772, "encoder_q-layer.3": 504.7627, "encoder_q-layer.4": 513.2646, "encoder_q-layer.5": 528.2021, "encoder_q-layer.6": 561.4189, "encoder_q-layer.7": 615.8047, "encoder_q-layer.8": 698.5081, "encoder_q-layer.9": 604.3873, "epoch": 0.56, "inbatch_neg_score": 0.3245, "inbatch_pos_score": 1.0, "learning_rate": 2.3666666666666668e-05, "loss": 3.1559, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.793, "num_token_overlap": 15.7945, "num_token_query": 42.1621, "num_token_union": 68.4263, "num_word_context": 201.9791, "num_word_doc": 49.83, "num_word_query": 31.8479, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1007.1935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.324, "query_norm": 1.3965, "queue_k_norm": 1.4582, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1621, "sent_len_1": 66.793, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4888, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1312, "doc_norm": 1.4551, "encoder_q-embeddings": 805.5651, "encoder_q-layer.0": 587.551, "encoder_q-layer.1": 640.327, "encoder_q-layer.10": 546.4975, "encoder_q-layer.11": 1335.1, "encoder_q-layer.2": 705.3387, "encoder_q-layer.3": 701.7272, "encoder_q-layer.4": 709.5637, "encoder_q-layer.5": 685.9817, "encoder_q-layer.6": 641.7305, "encoder_q-layer.7": 668.5973, "encoder_q-layer.8": 685.3784, "encoder_q-layer.9": 578.6896, "epoch": 0.56, "inbatch_neg_score": 0.3258, "inbatch_pos_score": 0.9902, "learning_rate": 2.361111111111111e-05, "loss": 3.1312, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.819, "num_token_overlap": 15.934, "num_token_query": 42.5256, "num_token_union": 68.4993, "num_word_context": 202.1438, "num_word_doc": 49.8412, "num_word_query": 32.1537, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1117.0979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3252, "query_norm": 1.3967, "queue_k_norm": 1.4567, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5256, "sent_len_1": 66.819, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.4825, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 52.0508, "active_queue_size": 16384.0, "cl_loss": 3.1544, "doc_norm": 1.4602, "encoder_q-embeddings": 2077.1663, "encoder_q-layer.0": 1604.9249, "encoder_q-layer.1": 1873.072, "encoder_q-layer.10": 630.2502, "encoder_q-layer.11": 1377.7131, "encoder_q-layer.2": 2236.9468, "encoder_q-layer.3": 2229.3545, "encoder_q-layer.4": 2427.8081, "encoder_q-layer.5": 2361.3665, "encoder_q-layer.6": 2047.1687, "encoder_q-layer.7": 1566.3938, "encoder_q-layer.8": 887.1074, "encoder_q-layer.9": 609.8148, "epoch": 0.56, "inbatch_neg_score": 0.3283, "inbatch_pos_score": 0.9849, "learning_rate": 2.3555555555555556e-05, "loss": 3.1544, "norm_diff": 0.0665, "norm_loss": 0.0, "num_token_doc": 66.8493, "num_token_overlap": 15.8269, "num_token_query": 42.3007, "num_token_union": 68.524, "num_word_context": 202.4975, "num_word_doc": 49.8992, "num_word_query": 31.987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2688.3537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3267, "query_norm": 1.3937, "queue_k_norm": 1.4584, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3007, "sent_len_1": 66.8493, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.1413, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1445, "doc_norm": 1.4549, "encoder_q-embeddings": 1518.3156, "encoder_q-layer.0": 1018.0814, "encoder_q-layer.1": 1174.1844, "encoder_q-layer.10": 579.0031, "encoder_q-layer.11": 1389.2778, "encoder_q-layer.2": 1348.2418, "encoder_q-layer.3": 1326.7985, "encoder_q-layer.4": 1364.1508, "encoder_q-layer.5": 1498.9338, "encoder_q-layer.6": 1721.835, "encoder_q-layer.7": 1553.668, "encoder_q-layer.8": 1407.5052, "encoder_q-layer.9": 974.1129, "epoch": 0.56, "inbatch_neg_score": 0.3323, "inbatch_pos_score": 0.9858, "learning_rate": 2.35e-05, "loss": 3.1445, "norm_diff": 0.0693, "norm_loss": 0.0, "num_token_doc": 66.8184, "num_token_overlap": 15.8212, "num_token_query": 42.3627, "num_token_union": 68.5283, "num_word_context": 202.212, "num_word_doc": 49.862, "num_word_query": 31.9867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2040.9396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3325, "query_norm": 1.3857, "queue_k_norm": 1.4591, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3627, "sent_len_1": 66.8184, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6062, "stdk": 0.0483, "stdq": 0.0443, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.1464, "doc_norm": 1.4591, "encoder_q-embeddings": 612.0481, "encoder_q-layer.0": 415.8889, "encoder_q-layer.1": 432.9494, "encoder_q-layer.10": 582.1309, "encoder_q-layer.11": 1385.3835, "encoder_q-layer.2": 501.7093, "encoder_q-layer.3": 517.9954, "encoder_q-layer.4": 535.2453, "encoder_q-layer.5": 539.9874, "encoder_q-layer.6": 552.1548, "encoder_q-layer.7": 605.2854, "encoder_q-layer.8": 691.9678, "encoder_q-layer.9": 596.4813, "epoch": 0.56, "inbatch_neg_score": 0.3323, "inbatch_pos_score": 0.9995, "learning_rate": 2.3444444444444448e-05, "loss": 3.1464, "norm_diff": 0.0604, "norm_loss": 0.0, "num_token_doc": 66.7313, "num_token_overlap": 15.8796, "num_token_query": 42.4461, "num_token_union": 68.4879, "num_word_context": 202.232, "num_word_doc": 49.7608, "num_word_query": 32.0568, "postclip_grad_norm": 1.0, "preclip_grad_norm": 992.9683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3323, "query_norm": 1.3986, "queue_k_norm": 1.4581, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4461, "sent_len_1": 66.7313, "sent_len_max_0": 127.99, "sent_len_max_1": 190.7138, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1547, "doc_norm": 1.4614, "encoder_q-embeddings": 477.4126, "encoder_q-layer.0": 316.8387, "encoder_q-layer.1": 339.8314, "encoder_q-layer.10": 622.4665, "encoder_q-layer.11": 1354.4772, "encoder_q-layer.2": 389.366, "encoder_q-layer.3": 408.0785, "encoder_q-layer.4": 417.8555, "encoder_q-layer.5": 420.9653, "encoder_q-layer.6": 464.5931, "encoder_q-layer.7": 511.3841, "encoder_q-layer.8": 591.6462, "encoder_q-layer.9": 564.1065, "epoch": 0.57, "inbatch_neg_score": 0.3357, "inbatch_pos_score": 1.0352, "learning_rate": 2.338888888888889e-05, "loss": 3.1547, "norm_diff": 0.0555, "norm_loss": 0.0, "num_token_doc": 66.7857, "num_token_overlap": 15.8421, "num_token_query": 42.3572, "num_token_union": 68.504, "num_word_context": 202.5268, "num_word_doc": 49.8797, "num_word_query": 32.0005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 882.7472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3376, "query_norm": 1.4059, "queue_k_norm": 1.4592, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3572, "sent_len_1": 66.7857, "sent_len_max_0": 128.0, "sent_len_max_1": 187.2525, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.126, "doc_norm": 1.4602, "encoder_q-embeddings": 809.1403, "encoder_q-layer.0": 528.9564, "encoder_q-layer.1": 611.3262, "encoder_q-layer.10": 566.684, "encoder_q-layer.11": 1375.6525, "encoder_q-layer.2": 706.9012, "encoder_q-layer.3": 753.5681, "encoder_q-layer.4": 792.9849, "encoder_q-layer.5": 777.3209, "encoder_q-layer.6": 788.8793, "encoder_q-layer.7": 798.1442, "encoder_q-layer.8": 802.2925, "encoder_q-layer.9": 612.0905, "epoch": 0.57, "inbatch_neg_score": 0.3374, "inbatch_pos_score": 1.0078, "learning_rate": 2.3333333333333336e-05, "loss": 3.126, "norm_diff": 0.049, "norm_loss": 0.0, "num_token_doc": 66.6462, "num_token_overlap": 15.7929, "num_token_query": 42.354, "num_token_union": 68.4289, "num_word_context": 202.2756, "num_word_doc": 49.6909, "num_word_query": 31.976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1182.9295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3369, "query_norm": 1.4112, "queue_k_norm": 1.4592, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.354, "sent_len_1": 66.6462, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7637, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1405, "doc_norm": 1.465, "encoder_q-embeddings": 584.6628, "encoder_q-layer.0": 404.1961, "encoder_q-layer.1": 428.5309, "encoder_q-layer.10": 570.7385, "encoder_q-layer.11": 1384.4281, "encoder_q-layer.2": 472.0387, "encoder_q-layer.3": 504.7263, "encoder_q-layer.4": 541.7222, "encoder_q-layer.5": 552.8165, "encoder_q-layer.6": 554.3125, "encoder_q-layer.7": 564.9644, "encoder_q-layer.8": 640.5381, "encoder_q-layer.9": 574.5009, "epoch": 0.57, "inbatch_neg_score": 0.3412, "inbatch_pos_score": 1.0234, "learning_rate": 2.3277777777777778e-05, "loss": 3.1405, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.705, "num_token_overlap": 15.8569, "num_token_query": 42.3555, "num_token_union": 68.4045, "num_word_context": 202.3913, "num_word_doc": 49.7802, "num_word_query": 31.99, "postclip_grad_norm": 1.0, "preclip_grad_norm": 970.7903, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3403, "query_norm": 1.4143, "queue_k_norm": 1.4599, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3555, "sent_len_1": 66.705, "sent_len_max_0": 128.0, "sent_len_max_1": 189.33, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.1493, "doc_norm": 1.4561, "encoder_q-embeddings": 4553.6318, "encoder_q-layer.0": 3565.0356, "encoder_q-layer.1": 4552.0615, "encoder_q-layer.10": 592.6906, "encoder_q-layer.11": 1310.6469, "encoder_q-layer.2": 6430.0469, "encoder_q-layer.3": 6501.6494, "encoder_q-layer.4": 5685.3384, "encoder_q-layer.5": 5577.4614, "encoder_q-layer.6": 5378.1753, "encoder_q-layer.7": 4570.9976, "encoder_q-layer.8": 3336.0039, "encoder_q-layer.9": 1475.7726, "epoch": 0.57, "inbatch_neg_score": 0.3386, "inbatch_pos_score": 1.0049, "learning_rate": 2.3222222222222224e-05, "loss": 3.1493, "norm_diff": 0.042, "norm_loss": 0.0, "num_token_doc": 66.5337, "num_token_overlap": 15.7574, "num_token_query": 42.2808, "num_token_union": 68.3826, "num_word_context": 201.6953, "num_word_doc": 49.6463, "num_word_query": 31.9231, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6845.1355, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3372, "query_norm": 1.4177, "queue_k_norm": 1.4593, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2808, "sent_len_1": 66.5337, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1025, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.1404, "doc_norm": 1.4632, "encoder_q-embeddings": 509.6785, "encoder_q-layer.0": 337.3292, "encoder_q-layer.1": 359.9294, "encoder_q-layer.10": 594.2363, "encoder_q-layer.11": 1327.0049, "encoder_q-layer.2": 418.7092, "encoder_q-layer.3": 429.0367, "encoder_q-layer.4": 448.9041, "encoder_q-layer.5": 448.0143, "encoder_q-layer.6": 496.1827, "encoder_q-layer.7": 540.7308, "encoder_q-layer.8": 634.8951, "encoder_q-layer.9": 559.4061, "epoch": 0.57, "inbatch_neg_score": 0.3408, "inbatch_pos_score": 1.0283, "learning_rate": 2.3166666666666666e-05, "loss": 3.1404, "norm_diff": 0.0589, "norm_loss": 0.0, "num_token_doc": 66.7018, "num_token_overlap": 15.7753, "num_token_query": 42.228, "num_token_union": 68.4175, "num_word_context": 202.2612, "num_word_doc": 49.7386, "num_word_query": 31.8992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 901.1124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3408, "query_norm": 1.4044, "queue_k_norm": 1.4597, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.228, "sent_len_1": 66.7018, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5938, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1305, "doc_norm": 1.4582, "encoder_q-embeddings": 531.5103, "encoder_q-layer.0": 354.3051, "encoder_q-layer.1": 359.5742, "encoder_q-layer.10": 689.5461, "encoder_q-layer.11": 1407.9497, "encoder_q-layer.2": 396.8466, "encoder_q-layer.3": 415.0671, "encoder_q-layer.4": 442.2586, "encoder_q-layer.5": 465.4996, "encoder_q-layer.6": 510.8983, "encoder_q-layer.7": 542.5286, "encoder_q-layer.8": 627.0568, "encoder_q-layer.9": 575.338, "epoch": 0.57, "inbatch_neg_score": 0.3458, "inbatch_pos_score": 1.0234, "learning_rate": 2.3111111111111112e-05, "loss": 3.1305, "norm_diff": 0.0397, "norm_loss": 0.0, "num_token_doc": 66.7805, "num_token_overlap": 15.8646, "num_token_query": 42.3831, "num_token_union": 68.5036, "num_word_context": 201.9387, "num_word_doc": 49.878, "num_word_query": 32.0244, "postclip_grad_norm": 1.0, "preclip_grad_norm": 942.1061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.345, "query_norm": 1.4201, "queue_k_norm": 1.4611, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3831, "sent_len_1": 66.7805, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6825, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.1381, "doc_norm": 1.4588, "encoder_q-embeddings": 861.2103, "encoder_q-layer.0": 574.6963, "encoder_q-layer.1": 662.2338, "encoder_q-layer.10": 649.4717, "encoder_q-layer.11": 1432.611, "encoder_q-layer.2": 797.1735, "encoder_q-layer.3": 828.5173, "encoder_q-layer.4": 905.9393, "encoder_q-layer.5": 912.4127, "encoder_q-layer.6": 852.7956, "encoder_q-layer.7": 789.6485, "encoder_q-layer.8": 777.7265, "encoder_q-layer.9": 624.6783, "epoch": 0.57, "inbatch_neg_score": 0.3478, "inbatch_pos_score": 0.9766, "learning_rate": 2.3055555555555558e-05, "loss": 3.1381, "norm_diff": 0.0552, "norm_loss": 0.0, "num_token_doc": 66.4513, "num_token_overlap": 15.8196, "num_token_query": 42.39, "num_token_union": 68.3458, "num_word_context": 201.9947, "num_word_doc": 49.6245, "num_word_query": 32.0274, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1263.4939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3467, "query_norm": 1.4036, "queue_k_norm": 1.4617, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.39, "sent_len_1": 66.4513, "sent_len_max_0": 127.995, "sent_len_max_1": 187.1163, "stdk": 0.0483, "stdq": 0.0444, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1419, "doc_norm": 1.4644, "encoder_q-embeddings": 844.8408, "encoder_q-layer.0": 535.8034, "encoder_q-layer.1": 612.5283, "encoder_q-layer.10": 603.3635, "encoder_q-layer.11": 1388.0229, "encoder_q-layer.2": 710.5567, "encoder_q-layer.3": 738.4174, "encoder_q-layer.4": 831.925, "encoder_q-layer.5": 863.1204, "encoder_q-layer.6": 899.0974, "encoder_q-layer.7": 788.3682, "encoder_q-layer.8": 801.2103, "encoder_q-layer.9": 608.6666, "epoch": 0.57, "inbatch_neg_score": 0.3529, "inbatch_pos_score": 1.0039, "learning_rate": 2.3000000000000003e-05, "loss": 3.1419, "norm_diff": 0.0486, "norm_loss": 0.0, "num_token_doc": 66.9326, "num_token_overlap": 15.8716, "num_token_query": 42.4793, "num_token_union": 68.6189, "num_word_context": 202.4258, "num_word_doc": 49.9692, "num_word_query": 32.104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1228.0466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3511, "query_norm": 1.4158, "queue_k_norm": 1.464, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4793, "sent_len_1": 66.9326, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4725, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 52.4414, "active_queue_size": 16384.0, "cl_loss": 3.157, "doc_norm": 1.4642, "encoder_q-embeddings": 1027.639, "encoder_q-layer.0": 817.7075, "encoder_q-layer.1": 898.9025, "encoder_q-layer.10": 621.1924, "encoder_q-layer.11": 1395.8512, "encoder_q-layer.2": 1127.3192, "encoder_q-layer.3": 1169.6436, "encoder_q-layer.4": 1192.8859, "encoder_q-layer.5": 1186.7809, "encoder_q-layer.6": 1157.6691, "encoder_q-layer.7": 1070.0417, "encoder_q-layer.8": 984.0101, "encoder_q-layer.9": 661.2824, "epoch": 0.57, "inbatch_neg_score": 0.3555, "inbatch_pos_score": 1.0156, "learning_rate": 2.2944444444444446e-05, "loss": 3.157, "norm_diff": 0.0438, "norm_loss": 0.0, "num_token_doc": 67.0721, "num_token_overlap": 15.8729, "num_token_query": 42.3043, "num_token_union": 68.6415, "num_word_context": 202.5128, "num_word_doc": 50.0838, "num_word_query": 31.9714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1580.4939, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.356, "query_norm": 1.4204, "queue_k_norm": 1.4631, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3043, "sent_len_1": 67.0721, "sent_len_max_0": 127.995, "sent_len_max_1": 190.7875, "stdk": 0.0484, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.1506, "doc_norm": 1.4604, "encoder_q-embeddings": 584.0734, "encoder_q-layer.0": 390.8461, "encoder_q-layer.1": 424.2911, "encoder_q-layer.10": 544.3029, "encoder_q-layer.11": 1299.5098, "encoder_q-layer.2": 471.3191, "encoder_q-layer.3": 485.406, "encoder_q-layer.4": 506.4419, "encoder_q-layer.5": 526.6913, "encoder_q-layer.6": 563.9702, "encoder_q-layer.7": 606.1923, "encoder_q-layer.8": 673.9362, "encoder_q-layer.9": 575.0569, "epoch": 0.57, "inbatch_neg_score": 0.3555, "inbatch_pos_score": 1.0322, "learning_rate": 2.288888888888889e-05, "loss": 3.1506, "norm_diff": 0.0427, "norm_loss": 0.0, "num_token_doc": 67.0287, "num_token_overlap": 15.8057, "num_token_query": 42.3438, "num_token_union": 68.6509, "num_word_context": 202.4093, "num_word_doc": 49.9737, "num_word_query": 31.9852, "postclip_grad_norm": 1.0, "preclip_grad_norm": 930.6716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3555, "query_norm": 1.4176, "queue_k_norm": 1.4638, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3438, "sent_len_1": 67.0287, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.1863, "stdk": 0.0483, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1215, "doc_norm": 1.466, "encoder_q-embeddings": 650.7985, "encoder_q-layer.0": 435.1104, "encoder_q-layer.1": 460.6812, "encoder_q-layer.10": 561.2948, "encoder_q-layer.11": 1304.4774, "encoder_q-layer.2": 516.5103, "encoder_q-layer.3": 544.9128, "encoder_q-layer.4": 611.8788, "encoder_q-layer.5": 644.2945, "encoder_q-layer.6": 649.9128, "encoder_q-layer.7": 683.2017, "encoder_q-layer.8": 651.2993, "encoder_q-layer.9": 564.1862, "epoch": 0.58, "inbatch_neg_score": 0.3628, "inbatch_pos_score": 1.043, "learning_rate": 2.2833333333333334e-05, "loss": 3.1215, "norm_diff": 0.0361, "norm_loss": 0.0, "num_token_doc": 67.125, "num_token_overlap": 15.9165, "num_token_query": 42.4798, "num_token_union": 68.7207, "num_word_context": 202.7612, "num_word_doc": 50.092, "num_word_query": 32.1031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1002.102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3623, "query_norm": 1.4298, "queue_k_norm": 1.4663, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4798, "sent_len_1": 67.125, "sent_len_max_0": 128.0, "sent_len_max_1": 188.27, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1394, "doc_norm": 1.4673, "encoder_q-embeddings": 691.5981, "encoder_q-layer.0": 479.0529, "encoder_q-layer.1": 508.4531, "encoder_q-layer.10": 653.7751, "encoder_q-layer.11": 1365.5526, "encoder_q-layer.2": 581.9509, "encoder_q-layer.3": 604.7195, "encoder_q-layer.4": 662.1076, "encoder_q-layer.5": 623.8954, "encoder_q-layer.6": 582.7086, "encoder_q-layer.7": 648.0604, "encoder_q-layer.8": 676.7814, "encoder_q-layer.9": 565.108, "epoch": 0.58, "inbatch_neg_score": 0.3659, "inbatch_pos_score": 1.0596, "learning_rate": 2.277777777777778e-05, "loss": 3.1394, "norm_diff": 0.0383, "norm_loss": 0.0, "num_token_doc": 66.745, "num_token_overlap": 15.7785, "num_token_query": 42.1878, "num_token_union": 68.4435, "num_word_context": 202.3232, "num_word_doc": 49.8139, "num_word_query": 31.8722, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1057.8154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3657, "query_norm": 1.4289, "queue_k_norm": 1.4657, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1878, "sent_len_1": 66.745, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.1875, "stdk": 0.0485, "stdq": 0.0455, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.1618, "doc_norm": 1.4711, "encoder_q-embeddings": 604.1066, "encoder_q-layer.0": 399.4274, "encoder_q-layer.1": 431.4223, "encoder_q-layer.10": 582.1924, "encoder_q-layer.11": 1415.3114, "encoder_q-layer.2": 491.2623, "encoder_q-layer.3": 512.8324, "encoder_q-layer.4": 574.3021, "encoder_q-layer.5": 594.3817, "encoder_q-layer.6": 626.0992, "encoder_q-layer.7": 664.4054, "encoder_q-layer.8": 750.5299, "encoder_q-layer.9": 602.7731, "epoch": 0.58, "inbatch_neg_score": 0.3686, "inbatch_pos_score": 1.084, "learning_rate": 2.2722222222222222e-05, "loss": 3.1618, "norm_diff": 0.0487, "norm_loss": 0.0, "num_token_doc": 66.4902, "num_token_overlap": 15.7942, "num_token_query": 42.3254, "num_token_union": 68.3439, "num_word_context": 202.0266, "num_word_doc": 49.6414, "num_word_query": 31.9807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1014.7584, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3682, "query_norm": 1.4223, "queue_k_norm": 1.4649, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3254, "sent_len_1": 66.4902, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1712, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1534, "doc_norm": 1.4727, "encoder_q-embeddings": 543.4575, "encoder_q-layer.0": 344.8201, "encoder_q-layer.1": 386.4341, "encoder_q-layer.10": 609.3416, "encoder_q-layer.11": 1402.2313, "encoder_q-layer.2": 436.402, "encoder_q-layer.3": 454.7133, "encoder_q-layer.4": 465.2037, "encoder_q-layer.5": 477.2918, "encoder_q-layer.6": 547.3607, "encoder_q-layer.7": 600.8101, "encoder_q-layer.8": 687.8998, "encoder_q-layer.9": 603.1893, "epoch": 0.58, "inbatch_neg_score": 0.3721, "inbatch_pos_score": 1.0781, "learning_rate": 2.2666666666666668e-05, "loss": 3.1534, "norm_diff": 0.0519, "norm_loss": 0.0, "num_token_doc": 66.778, "num_token_overlap": 15.8499, "num_token_query": 42.3521, "num_token_union": 68.5172, "num_word_context": 202.5864, "num_word_doc": 49.8775, "num_word_query": 32.0329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 945.0979, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3726, "query_norm": 1.4207, "queue_k_norm": 1.4687, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3521, "sent_len_1": 66.778, "sent_len_max_0": 128.0, "sent_len_max_1": 187.1562, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.123, "doc_norm": 1.4712, "encoder_q-embeddings": 1180.8032, "encoder_q-layer.0": 758.9208, "encoder_q-layer.1": 805.3209, "encoder_q-layer.10": 1280.4243, "encoder_q-layer.11": 2750.9189, "encoder_q-layer.2": 859.958, "encoder_q-layer.3": 862.4583, "encoder_q-layer.4": 906.7972, "encoder_q-layer.5": 938.0555, "encoder_q-layer.6": 976.0399, "encoder_q-layer.7": 1088.8998, "encoder_q-layer.8": 1255.5034, "encoder_q-layer.9": 1119.3949, "epoch": 0.58, "inbatch_neg_score": 0.3711, "inbatch_pos_score": 1.0322, "learning_rate": 2.2611111111111113e-05, "loss": 3.123, "norm_diff": 0.076, "norm_loss": 0.0, "num_token_doc": 66.8351, "num_token_overlap": 15.8794, "num_token_query": 42.467, "num_token_union": 68.5586, "num_word_context": 202.1198, "num_word_doc": 49.8663, "num_word_query": 32.0683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1887.4888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3711, "query_norm": 1.3952, "queue_k_norm": 1.4679, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.467, "sent_len_1": 66.8351, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.2862, "stdk": 0.0486, "stdq": 0.0442, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1442, "doc_norm": 1.4721, "encoder_q-embeddings": 1500.991, "encoder_q-layer.0": 1018.4089, "encoder_q-layer.1": 1062.7791, "encoder_q-layer.10": 1199.5957, "encoder_q-layer.11": 2617.3306, "encoder_q-layer.2": 1158.5839, "encoder_q-layer.3": 1187.802, "encoder_q-layer.4": 1207.6298, "encoder_q-layer.5": 1168.2021, "encoder_q-layer.6": 1229.1863, "encoder_q-layer.7": 1295.8157, "encoder_q-layer.8": 1315.9121, "encoder_q-layer.9": 1130.7234, "epoch": 0.58, "inbatch_neg_score": 0.3801, "inbatch_pos_score": 1.0537, "learning_rate": 2.255555555555556e-05, "loss": 3.1442, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 66.8987, "num_token_overlap": 15.824, "num_token_query": 42.2264, "num_token_union": 68.4729, "num_word_context": 202.5402, "num_word_doc": 49.9193, "num_word_query": 31.9184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2071.3239, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.377, "query_norm": 1.4078, "queue_k_norm": 1.4697, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2264, "sent_len_1": 66.8987, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4613, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.148, "doc_norm": 1.465, "encoder_q-embeddings": 1534.2682, "encoder_q-layer.0": 1022.4476, "encoder_q-layer.1": 1077.9333, "encoder_q-layer.10": 1306.9198, "encoder_q-layer.11": 2880.0291, "encoder_q-layer.2": 1266.7172, "encoder_q-layer.3": 1306.9546, "encoder_q-layer.4": 1424.8145, "encoder_q-layer.5": 1478.5464, "encoder_q-layer.6": 1531.9828, "encoder_q-layer.7": 1603.4641, "encoder_q-layer.8": 1478.9716, "encoder_q-layer.9": 1242.0193, "epoch": 0.58, "inbatch_neg_score": 0.3801, "inbatch_pos_score": 1.043, "learning_rate": 2.25e-05, "loss": 3.148, "norm_diff": 0.0458, "norm_loss": 0.0, "num_token_doc": 66.9317, "num_token_overlap": 15.8373, "num_token_query": 42.2325, "num_token_union": 68.5012, "num_word_context": 202.5582, "num_word_doc": 49.9689, "num_word_query": 31.9097, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2291.7764, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3792, "query_norm": 1.4192, "queue_k_norm": 1.4685, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2325, "sent_len_1": 66.9317, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.985, "stdk": 0.0482, "stdq": 0.0452, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.1437, "doc_norm": 1.4716, "encoder_q-embeddings": 1738.8499, "encoder_q-layer.0": 1187.1768, "encoder_q-layer.1": 1381.5208, "encoder_q-layer.10": 1113.1547, "encoder_q-layer.11": 2654.4512, "encoder_q-layer.2": 1626.7013, "encoder_q-layer.3": 1785.2411, "encoder_q-layer.4": 2033.6736, "encoder_q-layer.5": 2018.4711, "encoder_q-layer.6": 1812.6697, "encoder_q-layer.7": 1741.1794, "encoder_q-layer.8": 1630.3875, "encoder_q-layer.9": 1109.5233, "epoch": 0.58, "inbatch_neg_score": 0.3802, "inbatch_pos_score": 1.0742, "learning_rate": 2.2444444444444447e-05, "loss": 3.1437, "norm_diff": 0.0467, "norm_loss": 0.0, "num_token_doc": 66.5386, "num_token_overlap": 15.7536, "num_token_query": 42.2556, "num_token_union": 68.322, "num_word_context": 202.0279, "num_word_doc": 49.6177, "num_word_query": 31.9241, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2577.9146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3809, "query_norm": 1.4249, "queue_k_norm": 1.4709, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2556, "sent_len_1": 66.5386, "sent_len_max_0": 128.0, "sent_len_max_1": 190.24, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.1361, "doc_norm": 1.4704, "encoder_q-embeddings": 1079.5737, "encoder_q-layer.0": 706.8511, "encoder_q-layer.1": 754.1718, "encoder_q-layer.10": 1203.7465, "encoder_q-layer.11": 2670.8535, "encoder_q-layer.2": 862.9351, "encoder_q-layer.3": 892.3944, "encoder_q-layer.4": 935.8134, "encoder_q-layer.5": 976.3093, "encoder_q-layer.6": 1043.7396, "encoder_q-layer.7": 1158.5723, "encoder_q-layer.8": 1245.9496, "encoder_q-layer.9": 1153.3479, "epoch": 0.58, "inbatch_neg_score": 0.3912, "inbatch_pos_score": 1.0723, "learning_rate": 2.238888888888889e-05, "loss": 3.1361, "norm_diff": 0.0548, "norm_loss": 0.0, "num_token_doc": 66.6835, "num_token_overlap": 15.8487, "num_token_query": 42.3051, "num_token_union": 68.4206, "num_word_context": 202.1944, "num_word_doc": 49.7578, "num_word_query": 31.9259, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1867.0157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3901, "query_norm": 1.4156, "queue_k_norm": 1.4719, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3051, "sent_len_1": 66.6835, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7537, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1307, "doc_norm": 1.4743, "encoder_q-embeddings": 1221.3959, "encoder_q-layer.0": 831.9175, "encoder_q-layer.1": 854.285, "encoder_q-layer.10": 1259.769, "encoder_q-layer.11": 2794.7605, "encoder_q-layer.2": 950.5015, "encoder_q-layer.3": 977.6024, "encoder_q-layer.4": 1017.9522, "encoder_q-layer.5": 1068.6313, "encoder_q-layer.6": 1153.9015, "encoder_q-layer.7": 1202.5565, "encoder_q-layer.8": 1349.9617, "encoder_q-layer.9": 1183.8074, "epoch": 0.58, "inbatch_neg_score": 0.3816, "inbatch_pos_score": 1.0654, "learning_rate": 2.2333333333333335e-05, "loss": 3.1307, "norm_diff": 0.051, "norm_loss": 0.0, "num_token_doc": 66.8267, "num_token_overlap": 15.8853, "num_token_query": 42.3824, "num_token_union": 68.5245, "num_word_context": 202.1196, "num_word_doc": 49.9197, "num_word_query": 32.002, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1978.7311, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3809, "query_norm": 1.4233, "queue_k_norm": 1.4748, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3824, "sent_len_1": 66.8267, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1598, "doc_norm": 1.4715, "encoder_q-embeddings": 1123.3894, "encoder_q-layer.0": 693.3016, "encoder_q-layer.1": 773.7617, "encoder_q-layer.10": 1270.5411, "encoder_q-layer.11": 2895.1294, "encoder_q-layer.2": 857.9244, "encoder_q-layer.3": 903.8174, "encoder_q-layer.4": 949.3602, "encoder_q-layer.5": 964.7598, "encoder_q-layer.6": 1060.8917, "encoder_q-layer.7": 1240.9342, "encoder_q-layer.8": 1394.2756, "encoder_q-layer.9": 1215.3698, "epoch": 0.58, "inbatch_neg_score": 0.3837, "inbatch_pos_score": 1.0342, "learning_rate": 2.2277777777777778e-05, "loss": 3.1598, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.7842, "num_token_overlap": 15.7844, "num_token_query": 42.2584, "num_token_union": 68.5084, "num_word_context": 202.6953, "num_word_doc": 49.8517, "num_word_query": 31.924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1956.8241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3838, "query_norm": 1.4131, "queue_k_norm": 1.4732, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2584, "sent_len_1": 66.7842, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4437, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.1287, "doc_norm": 1.4678, "encoder_q-embeddings": 1447.0443, "encoder_q-layer.0": 1010.4562, "encoder_q-layer.1": 1156.0873, "encoder_q-layer.10": 1184.3253, "encoder_q-layer.11": 2850.5215, "encoder_q-layer.2": 1535.4271, "encoder_q-layer.3": 1560.1089, "encoder_q-layer.4": 1604.0454, "encoder_q-layer.5": 1896.8475, "encoder_q-layer.6": 1792.3022, "encoder_q-layer.7": 1740.0215, "encoder_q-layer.8": 1494.2161, "encoder_q-layer.9": 1173.9042, "epoch": 0.59, "inbatch_neg_score": 0.3856, "inbatch_pos_score": 1.0557, "learning_rate": 2.2222222222222223e-05, "loss": 3.1287, "norm_diff": 0.0506, "norm_loss": 0.0, "num_token_doc": 66.8565, "num_token_overlap": 15.8385, "num_token_query": 42.4703, "num_token_union": 68.6237, "num_word_context": 202.3691, "num_word_doc": 49.8882, "num_word_query": 32.0994, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2458.5227, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3853, "query_norm": 1.4172, "queue_k_norm": 1.4754, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4703, "sent_len_1": 66.8565, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0325, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 27.8304, "dev_samples_per_second": 2.3, "dev_steps_per_second": 0.036, "epoch": 0.59, "step": 60000, "test_accuracy": 93.9697265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.35770922899246216, "test_doc_norm": 1.4630223512649536, "test_inbatch_neg_score": 0.7574794292449951, "test_inbatch_pos_score": 1.698211908340454, "test_loss": 0.35770922899246216, "test_loss_align": 1.0607609748840332, "test_loss_unif": 3.698563575744629, "test_loss_unif_q@queue": 3.69856333732605, "test_norm_diff": 0.04816839098930359, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.38222119212150574, "test_query_norm": 1.51119065284729, "test_queue_k_norm": 1.475304365158081, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042704127728939056, "test_stdq": 0.04273711144924164, "test_stdqueue_k": 0.04858649522066116, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.8304, "dev_samples_per_second": 2.3, "dev_steps_per_second": 0.036, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.35885, "eval_beir-arguana_recall@10": 0.59744, "eval_beir-arguana_recall@100": 0.90256, "eval_beir-arguana_recall@20": 0.72617, "eval_beir-avg_ndcg@10": 0.36988525, "eval_beir-avg_recall@10": 0.43760766666666673, "eval_beir-avg_recall@100": 0.6172223333333334, "eval_beir-avg_recall@20": 0.4978096666666666, "eval_beir-cqadupstack_ndcg@10": 0.2593125, "eval_beir-cqadupstack_recall@10": 0.3489966666666667, "eval_beir-cqadupstack_recall@100": 0.5784333333333332, "eval_beir-cqadupstack_recall@20": 0.4165466666666667, "eval_beir-fiqa_ndcg@10": 0.23867, "eval_beir-fiqa_recall@10": 0.29981, "eval_beir-fiqa_recall@100": 0.55867, "eval_beir-fiqa_recall@20": 0.36346, "eval_beir-nfcorpus_ndcg@10": 0.28004, "eval_beir-nfcorpus_recall@10": 0.13428, "eval_beir-nfcorpus_recall@100": 0.27518, "eval_beir-nfcorpus_recall@20": 0.16669, "eval_beir-nq_ndcg@10": 0.25831, "eval_beir-nq_recall@10": 0.42391, "eval_beir-nq_recall@100": 0.76159, "eval_beir-nq_recall@20": 0.54203, "eval_beir-quora_ndcg@10": 0.76827, "eval_beir-quora_recall@10": 0.88062, "eval_beir-quora_recall@100": 0.97562, "eval_beir-quora_recall@20": 0.92356, "eval_beir-scidocs_ndcg@10": 0.14504, "eval_beir-scidocs_recall@10": 0.15428, "eval_beir-scidocs_recall@100": 0.35145, "eval_beir-scidocs_recall@20": 0.21302, "eval_beir-scifact_ndcg@10": 0.63571, "eval_beir-scifact_recall@10": 0.79678, "eval_beir-scifact_recall@100": 0.90256, "eval_beir-scifact_recall@20": 0.84856, "eval_beir-trec-covid_ndcg@10": 0.55144, "eval_beir-trec-covid_recall@10": 0.598, "eval_beir-trec-covid_recall@100": 0.4444, "eval_beir-trec-covid_recall@20": 0.57, "eval_beir-webis-touche2020_ndcg@10": 0.20321, "eval_beir-webis-touche2020_recall@10": 0.14196, "eval_beir-webis-touche2020_recall@100": 0.42176, "eval_beir-webis-touche2020_recall@20": 0.20806, "eval_senteval-avg_sts": 0.7571115575523152, "eval_senteval-sickr_spearman": 0.7200544147213508, "eval_senteval-stsb_spearman": 0.7941687003832796, "step": 60000, "test_accuracy": 93.9697265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.35770922899246216, "test_doc_norm": 1.4630223512649536, "test_inbatch_neg_score": 0.7574794292449951, "test_inbatch_pos_score": 1.698211908340454, "test_loss": 0.35770922899246216, "test_loss_align": 1.0607609748840332, "test_loss_unif": 3.698563575744629, "test_loss_unif_q@queue": 3.69856333732605, "test_norm_diff": 0.04816839098930359, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.38222119212150574, "test_query_norm": 1.51119065284729, "test_queue_k_norm": 1.475304365158081, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042704127728939056, "test_stdq": 0.04273711144924164, "test_stdqueue_k": 0.04858649522066116, "test_stdqueue_q": 0.0 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.116, "doc_norm": 1.4789, "encoder_q-embeddings": 921.3102, "encoder_q-layer.0": 609.9509, "encoder_q-layer.1": 645.2338, "encoder_q-layer.10": 1249.0197, "encoder_q-layer.11": 2780.7913, "encoder_q-layer.2": 729.0053, "encoder_q-layer.3": 764.1378, "encoder_q-layer.4": 797.0485, "encoder_q-layer.5": 847.021, "encoder_q-layer.6": 954.3829, "encoder_q-layer.7": 1122.9921, "encoder_q-layer.8": 1286.472, "encoder_q-layer.9": 1177.5739, "epoch": 0.59, "inbatch_neg_score": 0.3903, "inbatch_pos_score": 1.0732, "learning_rate": 2.216666666666667e-05, "loss": 3.116, "norm_diff": 0.0504, "norm_loss": 0.0, "num_token_doc": 66.729, "num_token_overlap": 15.8519, "num_token_query": 42.3984, "num_token_union": 68.5018, "num_word_context": 202.3758, "num_word_doc": 49.7816, "num_word_query": 32.0165, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1772.2699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3892, "query_norm": 1.4285, "queue_k_norm": 1.4756, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3984, "sent_len_1": 66.729, "sent_len_max_0": 128.0, "sent_len_max_1": 188.785, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1323, "doc_norm": 1.4818, "encoder_q-embeddings": 983.8351, "encoder_q-layer.0": 633.3767, "encoder_q-layer.1": 668.1885, "encoder_q-layer.10": 1129.8862, "encoder_q-layer.11": 2674.4102, "encoder_q-layer.2": 776.9865, "encoder_q-layer.3": 812.1667, "encoder_q-layer.4": 868.5258, "encoder_q-layer.5": 875.4271, "encoder_q-layer.6": 937.1874, "encoder_q-layer.7": 1054.7651, "encoder_q-layer.8": 1250.7714, "encoder_q-layer.9": 1140.7731, "epoch": 0.59, "inbatch_neg_score": 0.3855, "inbatch_pos_score": 1.0801, "learning_rate": 2.211111111111111e-05, "loss": 3.1323, "norm_diff": 0.0394, "norm_loss": 0.0, "num_token_doc": 66.6803, "num_token_overlap": 15.7904, "num_token_query": 42.2919, "num_token_union": 68.4434, "num_word_context": 202.2782, "num_word_doc": 49.7345, "num_word_query": 31.9417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1787.1358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3853, "query_norm": 1.4424, "queue_k_norm": 1.4779, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2919, "sent_len_1": 66.6803, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7138, "stdk": 0.0487, "stdq": 0.0459, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1341, "doc_norm": 1.478, "encoder_q-embeddings": 1410.4026, "encoder_q-layer.0": 949.7896, "encoder_q-layer.1": 1038.9851, "encoder_q-layer.10": 1299.3206, "encoder_q-layer.11": 2848.437, "encoder_q-layer.2": 1169.8823, "encoder_q-layer.3": 1263.2292, "encoder_q-layer.4": 1349.859, "encoder_q-layer.5": 1318.0902, "encoder_q-layer.6": 1499.2507, "encoder_q-layer.7": 1713.886, "encoder_q-layer.8": 1676.5989, "encoder_q-layer.9": 1319.0914, "epoch": 0.59, "inbatch_neg_score": 0.3861, "inbatch_pos_score": 1.0684, "learning_rate": 2.2055555555555557e-05, "loss": 3.1341, "norm_diff": 0.0607, "norm_loss": 0.0, "num_token_doc": 66.8057, "num_token_overlap": 15.778, "num_token_query": 42.3692, "num_token_union": 68.5518, "num_word_context": 202.6482, "num_word_doc": 49.8869, "num_word_query": 32.0169, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2276.7475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3853, "query_norm": 1.4173, "queue_k_norm": 1.4784, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3692, "sent_len_1": 66.8057, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.29, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1226, "doc_norm": 1.4812, "encoder_q-embeddings": 1965.8098, "encoder_q-layer.0": 1271.2793, "encoder_q-layer.1": 1381.9326, "encoder_q-layer.10": 1415.3666, "encoder_q-layer.11": 2834.2854, "encoder_q-layer.2": 1710.9971, "encoder_q-layer.3": 1831.463, "encoder_q-layer.4": 2134.1191, "encoder_q-layer.5": 2309.6799, "encoder_q-layer.6": 2587.5498, "encoder_q-layer.7": 2527.9458, "encoder_q-layer.8": 2215.8188, "encoder_q-layer.9": 1322.3284, "epoch": 0.59, "inbatch_neg_score": 0.3838, "inbatch_pos_score": 1.0664, "learning_rate": 2.2000000000000003e-05, "loss": 3.1226, "norm_diff": 0.0721, "norm_loss": 0.0, "num_token_doc": 66.6826, "num_token_overlap": 15.9014, "num_token_query": 42.4389, "num_token_union": 68.3978, "num_word_context": 202.1687, "num_word_doc": 49.7499, "num_word_query": 32.0414, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2984.967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3848, "query_norm": 1.4091, "queue_k_norm": 1.4764, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4389, "sent_len_1": 66.6826, "sent_len_max_0": 127.995, "sent_len_max_1": 190.2887, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.1367, "doc_norm": 1.4824, "encoder_q-embeddings": 1066.7395, "encoder_q-layer.0": 704.1894, "encoder_q-layer.1": 779.001, "encoder_q-layer.10": 1192.0182, "encoder_q-layer.11": 2736.002, "encoder_q-layer.2": 869.0217, "encoder_q-layer.3": 898.8972, "encoder_q-layer.4": 970.499, "encoder_q-layer.5": 974.1486, "encoder_q-layer.6": 1086.6321, "encoder_q-layer.7": 1101.6938, "encoder_q-layer.8": 1245.7023, "encoder_q-layer.9": 1123.6653, "epoch": 0.59, "inbatch_neg_score": 0.3849, "inbatch_pos_score": 1.0869, "learning_rate": 2.1944444444444445e-05, "loss": 3.1367, "norm_diff": 0.0634, "norm_loss": 0.0, "num_token_doc": 66.7251, "num_token_overlap": 15.8076, "num_token_query": 42.2717, "num_token_union": 68.4804, "num_word_context": 202.2246, "num_word_doc": 49.8238, "num_word_query": 31.9272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1852.9287, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.385, "query_norm": 1.419, "queue_k_norm": 1.4778, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2717, "sent_len_1": 66.7251, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8587, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1345, "doc_norm": 1.4747, "encoder_q-embeddings": 1122.4907, "encoder_q-layer.0": 743.1008, "encoder_q-layer.1": 827.9894, "encoder_q-layer.10": 1175.3248, "encoder_q-layer.11": 2632.2825, "encoder_q-layer.2": 937.0531, "encoder_q-layer.3": 964.6293, "encoder_q-layer.4": 1025.1826, "encoder_q-layer.5": 969.2058, "encoder_q-layer.6": 1051.3893, "encoder_q-layer.7": 1125.3395, "encoder_q-layer.8": 1259.2076, "encoder_q-layer.9": 1150.8652, "epoch": 0.59, "inbatch_neg_score": 0.3924, "inbatch_pos_score": 1.0625, "learning_rate": 2.188888888888889e-05, "loss": 3.1345, "norm_diff": 0.0696, "norm_loss": 0.0, "num_token_doc": 66.9153, "num_token_overlap": 15.7742, "num_token_query": 42.1498, "num_token_union": 68.4778, "num_word_context": 202.3756, "num_word_doc": 49.9419, "num_word_query": 31.8144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1849.2706, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3909, "query_norm": 1.4051, "queue_k_norm": 1.4792, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1498, "sent_len_1": 66.9153, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1012, "stdk": 0.0483, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1077, "doc_norm": 1.4744, "encoder_q-embeddings": 971.3138, "encoder_q-layer.0": 662.8887, "encoder_q-layer.1": 713.1592, "encoder_q-layer.10": 1228.3376, "encoder_q-layer.11": 2811.3088, "encoder_q-layer.2": 791.0277, "encoder_q-layer.3": 822.4453, "encoder_q-layer.4": 896.6046, "encoder_q-layer.5": 895.9908, "encoder_q-layer.6": 1031.1925, "encoder_q-layer.7": 1169.3915, "encoder_q-layer.8": 1354.0073, "encoder_q-layer.9": 1226.4622, "epoch": 0.59, "inbatch_neg_score": 0.3889, "inbatch_pos_score": 1.042, "learning_rate": 2.1833333333333333e-05, "loss": 3.1077, "norm_diff": 0.0759, "norm_loss": 0.0, "num_token_doc": 67.0449, "num_token_overlap": 15.9141, "num_token_query": 42.4388, "num_token_union": 68.6549, "num_word_context": 202.3037, "num_word_doc": 50.0503, "num_word_query": 32.0472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1864.9631, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3872, "query_norm": 1.3984, "queue_k_norm": 1.4813, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4388, "sent_len_1": 67.0449, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1413, "stdk": 0.0482, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1366, "doc_norm": 1.4734, "encoder_q-embeddings": 1181.3865, "encoder_q-layer.0": 807.902, "encoder_q-layer.1": 894.5114, "encoder_q-layer.10": 1262.3809, "encoder_q-layer.11": 2794.5837, "encoder_q-layer.2": 999.9335, "encoder_q-layer.3": 1033.3374, "encoder_q-layer.4": 1072.7749, "encoder_q-layer.5": 1115.9156, "encoder_q-layer.6": 1199.9437, "encoder_q-layer.7": 1338.632, "encoder_q-layer.8": 1465.0558, "encoder_q-layer.9": 1216.7133, "epoch": 0.59, "inbatch_neg_score": 0.3773, "inbatch_pos_score": 1.0615, "learning_rate": 2.177777777777778e-05, "loss": 3.1366, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.6286, "num_token_overlap": 15.7364, "num_token_query": 42.2696, "num_token_union": 68.4269, "num_word_context": 202.1377, "num_word_doc": 49.7322, "num_word_query": 31.9254, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2001.155, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.377, "query_norm": 1.4136, "queue_k_norm": 1.4791, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2696, "sent_len_1": 66.6286, "sent_len_max_0": 127.995, "sent_len_max_1": 187.3413, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1258, "doc_norm": 1.4793, "encoder_q-embeddings": 1810.8923, "encoder_q-layer.0": 1247.7666, "encoder_q-layer.1": 1427.9204, "encoder_q-layer.10": 1251.2736, "encoder_q-layer.11": 2810.8123, "encoder_q-layer.2": 1564.7219, "encoder_q-layer.3": 1671.3445, "encoder_q-layer.4": 1648.9038, "encoder_q-layer.5": 1675.5657, "encoder_q-layer.6": 1749.5154, "encoder_q-layer.7": 1677.0615, "encoder_q-layer.8": 1598.3628, "encoder_q-layer.9": 1294.3478, "epoch": 0.59, "inbatch_neg_score": 0.3845, "inbatch_pos_score": 1.0723, "learning_rate": 2.1722222222222225e-05, "loss": 3.1258, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.9477, "num_token_overlap": 15.8046, "num_token_query": 42.2518, "num_token_union": 68.5976, "num_word_context": 202.5164, "num_word_doc": 49.9902, "num_word_query": 31.9122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2510.0129, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3833, "query_norm": 1.4151, "queue_k_norm": 1.4812, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2518, "sent_len_1": 66.9477, "sent_len_max_0": 128.0, "sent_len_max_1": 187.1238, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1394, "doc_norm": 1.4787, "encoder_q-embeddings": 1378.7579, "encoder_q-layer.0": 952.5486, "encoder_q-layer.1": 1062.9498, "encoder_q-layer.10": 1300.1671, "encoder_q-layer.11": 2831.9504, "encoder_q-layer.2": 1169.2241, "encoder_q-layer.3": 1192.8132, "encoder_q-layer.4": 1200.5781, "encoder_q-layer.5": 1225.3535, "encoder_q-layer.6": 1309.9116, "encoder_q-layer.7": 1431.7555, "encoder_q-layer.8": 1459.2977, "encoder_q-layer.9": 1183.3984, "epoch": 0.6, "inbatch_neg_score": 0.3899, "inbatch_pos_score": 1.0605, "learning_rate": 2.1666666666666667e-05, "loss": 3.1394, "norm_diff": 0.0631, "norm_loss": 0.0, "num_token_doc": 66.7255, "num_token_overlap": 15.8274, "num_token_query": 42.3531, "num_token_union": 68.4761, "num_word_context": 202.629, "num_word_doc": 49.8217, "num_word_query": 32.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2173.3716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3887, "query_norm": 1.4155, "queue_k_norm": 1.4791, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3531, "sent_len_1": 66.7255, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.5475, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.1353, "doc_norm": 1.4865, "encoder_q-embeddings": 1249.4114, "encoder_q-layer.0": 752.5885, "encoder_q-layer.1": 828.8663, "encoder_q-layer.10": 1304.1537, "encoder_q-layer.11": 2738.0923, "encoder_q-layer.2": 948.2084, "encoder_q-layer.3": 1014.5893, "encoder_q-layer.4": 1102.8193, "encoder_q-layer.5": 1192.7697, "encoder_q-layer.6": 1323.7754, "encoder_q-layer.7": 1423.0687, "encoder_q-layer.8": 1481.6466, "encoder_q-layer.9": 1242.8262, "epoch": 0.6, "inbatch_neg_score": 0.3869, "inbatch_pos_score": 1.0947, "learning_rate": 2.1611111111111113e-05, "loss": 3.1353, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 66.6523, "num_token_overlap": 15.7978, "num_token_query": 42.3466, "num_token_union": 68.4006, "num_word_context": 202.3305, "num_word_doc": 49.7171, "num_word_query": 31.9827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.4306, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3872, "query_norm": 1.4354, "queue_k_norm": 1.4793, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3466, "sent_len_1": 66.6523, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.4238, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.1251, "doc_norm": 1.4786, "encoder_q-embeddings": 1003.9501, "encoder_q-layer.0": 657.9818, "encoder_q-layer.1": 687.0838, "encoder_q-layer.10": 1207.1991, "encoder_q-layer.11": 2696.4663, "encoder_q-layer.2": 787.9031, "encoder_q-layer.3": 814.0614, "encoder_q-layer.4": 882.1903, "encoder_q-layer.5": 889.8879, "encoder_q-layer.6": 952.4977, "encoder_q-layer.7": 1045.9333, "encoder_q-layer.8": 1216.2877, "encoder_q-layer.9": 1103.3518, "epoch": 0.6, "inbatch_neg_score": 0.3864, "inbatch_pos_score": 1.0566, "learning_rate": 2.1555555555555555e-05, "loss": 3.1251, "norm_diff": 0.0674, "norm_loss": 0.0, "num_token_doc": 66.6289, "num_token_overlap": 15.7464, "num_token_query": 42.2607, "num_token_union": 68.4433, "num_word_context": 202.308, "num_word_doc": 49.7405, "num_word_query": 31.9316, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1776.6999, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3865, "query_norm": 1.4112, "queue_k_norm": 1.4822, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2607, "sent_len_1": 66.6289, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0712, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 3.1296, "doc_norm": 1.4843, "encoder_q-embeddings": 2254.3867, "encoder_q-layer.0": 1439.509, "encoder_q-layer.1": 1634.9144, "encoder_q-layer.10": 2277.0359, "encoder_q-layer.11": 5222.5254, "encoder_q-layer.2": 1833.2247, "encoder_q-layer.3": 1921.701, "encoder_q-layer.4": 2079.2703, "encoder_q-layer.5": 2218.583, "encoder_q-layer.6": 2323.4517, "encoder_q-layer.7": 2437.5322, "encoder_q-layer.8": 2648.1516, "encoder_q-layer.9": 2258.0693, "epoch": 0.6, "inbatch_neg_score": 0.3898, "inbatch_pos_score": 1.1016, "learning_rate": 2.15e-05, "loss": 3.1296, "norm_diff": 0.0638, "norm_loss": 0.0, "num_token_doc": 66.6399, "num_token_overlap": 15.8469, "num_token_query": 42.37, "num_token_union": 68.4362, "num_word_context": 202.0443, "num_word_doc": 49.7736, "num_word_query": 32.0012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3769.2084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3882, "query_norm": 1.4205, "queue_k_norm": 1.4821, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.37, "sent_len_1": 66.6399, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.6562, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.1338, "doc_norm": 1.4819, "encoder_q-embeddings": 2201.1428, "encoder_q-layer.0": 1425.5283, "encoder_q-layer.1": 1514.8804, "encoder_q-layer.10": 2730.4133, "encoder_q-layer.11": 5747.6011, "encoder_q-layer.2": 1683.348, "encoder_q-layer.3": 1751.8839, "encoder_q-layer.4": 1980.8741, "encoder_q-layer.5": 1960.1493, "encoder_q-layer.6": 2183.5862, "encoder_q-layer.7": 2446.562, "encoder_q-layer.8": 2674.3408, "encoder_q-layer.9": 2393.7551, "epoch": 0.6, "inbatch_neg_score": 0.4006, "inbatch_pos_score": 1.0635, "learning_rate": 2.1444444444444443e-05, "loss": 3.1338, "norm_diff": 0.0529, "norm_loss": 0.0, "num_token_doc": 66.8462, "num_token_overlap": 15.8438, "num_token_query": 42.382, "num_token_union": 68.5575, "num_word_context": 202.1623, "num_word_doc": 49.876, "num_word_query": 31.9927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3915.2301, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3999, "query_norm": 1.4291, "queue_k_norm": 1.4814, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.382, "sent_len_1": 66.8462, "sent_len_max_0": 127.995, "sent_len_max_1": 189.635, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.1458, "doc_norm": 1.487, "encoder_q-embeddings": 2626.884, "encoder_q-layer.0": 1794.2252, "encoder_q-layer.1": 1923.1685, "encoder_q-layer.10": 2338.4321, "encoder_q-layer.11": 5452.2783, "encoder_q-layer.2": 2319.646, "encoder_q-layer.3": 2416.6338, "encoder_q-layer.4": 2491.7173, "encoder_q-layer.5": 2675.1184, "encoder_q-layer.6": 2948.3755, "encoder_q-layer.7": 2885.6921, "encoder_q-layer.8": 2971.7102, "encoder_q-layer.9": 2390.8477, "epoch": 0.6, "inbatch_neg_score": 0.3956, "inbatch_pos_score": 1.0645, "learning_rate": 2.138888888888889e-05, "loss": 3.1458, "norm_diff": 0.06, "norm_loss": 0.0, "num_token_doc": 66.7003, "num_token_overlap": 15.8173, "num_token_query": 42.312, "num_token_union": 68.4272, "num_word_context": 202.4464, "num_word_doc": 49.7589, "num_word_query": 31.9685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4201.7478, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.397, "query_norm": 1.427, "queue_k_norm": 1.4808, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.312, "sent_len_1": 66.7003, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.075, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1121, "doc_norm": 1.4765, "encoder_q-embeddings": 2704.0317, "encoder_q-layer.0": 1760.5591, "encoder_q-layer.1": 1906.1226, "encoder_q-layer.10": 2464.0554, "encoder_q-layer.11": 5455.5742, "encoder_q-layer.2": 2204.7573, "encoder_q-layer.3": 2327.457, "encoder_q-layer.4": 2590.4641, "encoder_q-layer.5": 2539.4819, "encoder_q-layer.6": 2830.5588, "encoder_q-layer.7": 2740.0908, "encoder_q-layer.8": 2815.8342, "encoder_q-layer.9": 2485.8496, "epoch": 0.6, "inbatch_neg_score": 0.3985, "inbatch_pos_score": 1.0732, "learning_rate": 2.1333333333333335e-05, "loss": 3.1121, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.8233, "num_token_overlap": 15.8075, "num_token_query": 42.2217, "num_token_union": 68.4562, "num_word_context": 202.2452, "num_word_doc": 49.8666, "num_word_query": 31.9013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4230.7667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3984, "query_norm": 1.4247, "queue_k_norm": 1.4819, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2217, "sent_len_1": 66.8233, "sent_len_max_0": 128.0, "sent_len_max_1": 189.67, "stdk": 0.0482, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1285, "doc_norm": 1.4849, "encoder_q-embeddings": 2013.0703, "encoder_q-layer.0": 1352.4456, "encoder_q-layer.1": 1446.9351, "encoder_q-layer.10": 2304.8872, "encoder_q-layer.11": 5376.4106, "encoder_q-layer.2": 1606.6493, "encoder_q-layer.3": 1682.8158, "encoder_q-layer.4": 1706.3035, "encoder_q-layer.5": 1772.8049, "encoder_q-layer.6": 1970.4766, "encoder_q-layer.7": 2206.3545, "encoder_q-layer.8": 2517.2532, "encoder_q-layer.9": 2271.4294, "epoch": 0.6, "inbatch_neg_score": 0.4006, "inbatch_pos_score": 1.0869, "learning_rate": 2.127777777777778e-05, "loss": 3.1285, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.8703, "num_token_overlap": 15.827, "num_token_query": 42.2577, "num_token_union": 68.514, "num_word_context": 202.1993, "num_word_doc": 49.8915, "num_word_query": 31.924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3627.8402, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4001, "query_norm": 1.4369, "queue_k_norm": 1.4825, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2577, "sent_len_1": 66.8703, "sent_len_max_0": 128.0, "sent_len_max_1": 189.325, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1149, "doc_norm": 1.4815, "encoder_q-embeddings": 3161.6145, "encoder_q-layer.0": 2210.2009, "encoder_q-layer.1": 2477.6467, "encoder_q-layer.10": 2316.5652, "encoder_q-layer.11": 5580.792, "encoder_q-layer.2": 3022.2927, "encoder_q-layer.3": 3271.2898, "encoder_q-layer.4": 3537.4102, "encoder_q-layer.5": 3636.2478, "encoder_q-layer.6": 3507.6348, "encoder_q-layer.7": 3176.8999, "encoder_q-layer.8": 2779.3254, "encoder_q-layer.9": 2320.6792, "epoch": 0.6, "inbatch_neg_score": 0.4036, "inbatch_pos_score": 1.0645, "learning_rate": 2.1222222222222223e-05, "loss": 3.1149, "norm_diff": 0.0558, "norm_loss": 0.0, "num_token_doc": 66.9599, "num_token_overlap": 15.8895, "num_token_query": 42.4554, "num_token_union": 68.685, "num_word_context": 202.4929, "num_word_doc": 50.0101, "num_word_query": 32.0882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4908.6053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4023, "query_norm": 1.4257, "queue_k_norm": 1.4858, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4554, "sent_len_1": 66.9599, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.3438, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 3.1306, "doc_norm": 1.4862, "encoder_q-embeddings": 2062.3018, "encoder_q-layer.0": 1427.3359, "encoder_q-layer.1": 1505.9861, "encoder_q-layer.10": 2425.2651, "encoder_q-layer.11": 5310.7764, "encoder_q-layer.2": 1671.771, "encoder_q-layer.3": 1700.673, "encoder_q-layer.4": 1845.3536, "encoder_q-layer.5": 1877.0127, "encoder_q-layer.6": 2090.6584, "encoder_q-layer.7": 2285.4443, "encoder_q-layer.8": 2581.0481, "encoder_q-layer.9": 2329.5474, "epoch": 0.6, "inbatch_neg_score": 0.4036, "inbatch_pos_score": 1.1006, "learning_rate": 2.116666666666667e-05, "loss": 3.1306, "norm_diff": 0.0523, "norm_loss": 0.0, "num_token_doc": 66.9893, "num_token_overlap": 15.8406, "num_token_query": 42.349, "num_token_union": 68.6027, "num_word_context": 202.5876, "num_word_doc": 50.0056, "num_word_query": 31.9844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3651.2171, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4048, "query_norm": 1.4339, "queue_k_norm": 1.4836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.349, "sent_len_1": 66.9893, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9538, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1329, "doc_norm": 1.4814, "encoder_q-embeddings": 2194.9619, "encoder_q-layer.0": 1401.8438, "encoder_q-layer.1": 1503.8503, "encoder_q-layer.10": 2411.2786, "encoder_q-layer.11": 5372.3301, "encoder_q-layer.2": 1706.163, "encoder_q-layer.3": 1735.3887, "encoder_q-layer.4": 1825.1997, "encoder_q-layer.5": 1892.4075, "encoder_q-layer.6": 2106.469, "encoder_q-layer.7": 2269.6035, "encoder_q-layer.8": 2546.7664, "encoder_q-layer.9": 2252.3213, "epoch": 0.61, "inbatch_neg_score": 0.4127, "inbatch_pos_score": 1.0908, "learning_rate": 2.111111111111111e-05, "loss": 3.1329, "norm_diff": 0.0466, "norm_loss": 0.0, "num_token_doc": 66.5831, "num_token_overlap": 15.773, "num_token_query": 42.294, "num_token_union": 68.4363, "num_word_context": 202.4425, "num_word_doc": 49.7005, "num_word_query": 31.9216, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3696.6351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4119, "query_norm": 1.4348, "queue_k_norm": 1.4843, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.294, "sent_len_1": 66.5831, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5375, "stdk": 0.0484, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1231, "doc_norm": 1.4851, "encoder_q-embeddings": 2279.9243, "encoder_q-layer.0": 1424.1184, "encoder_q-layer.1": 1473.8896, "encoder_q-layer.10": 2334.2002, "encoder_q-layer.11": 5346.1313, "encoder_q-layer.2": 1679.519, "encoder_q-layer.3": 1754.1149, "encoder_q-layer.4": 1803.0732, "encoder_q-layer.5": 1818.7891, "encoder_q-layer.6": 2054.3608, "encoder_q-layer.7": 2195.3074, "encoder_q-layer.8": 2487.6006, "encoder_q-layer.9": 2258.9368, "epoch": 0.61, "inbatch_neg_score": 0.4152, "inbatch_pos_score": 1.0957, "learning_rate": 2.1055555555555556e-05, "loss": 3.1231, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.6205, "num_token_overlap": 15.7941, "num_token_query": 42.188, "num_token_union": 68.3298, "num_word_context": 201.6357, "num_word_doc": 49.7581, "num_word_query": 31.8892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3730.8354, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.415, "query_norm": 1.4294, "queue_k_norm": 1.4847, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.188, "sent_len_1": 66.6205, "sent_len_max_0": 128.0, "sent_len_max_1": 186.82, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 53.418, "active_queue_size": 16384.0, "cl_loss": 3.1396, "doc_norm": 1.492, "encoder_q-embeddings": 2665.0854, "encoder_q-layer.0": 1788.3734, "encoder_q-layer.1": 1938.8064, "encoder_q-layer.10": 2573.1067, "encoder_q-layer.11": 5768.9912, "encoder_q-layer.2": 2210.9707, "encoder_q-layer.3": 2447.8586, "encoder_q-layer.4": 2596.6084, "encoder_q-layer.5": 2638.0085, "encoder_q-layer.6": 2704.9375, "encoder_q-layer.7": 2947.4355, "encoder_q-layer.8": 3462.2712, "encoder_q-layer.9": 2908.3755, "epoch": 0.61, "inbatch_neg_score": 0.4112, "inbatch_pos_score": 1.0859, "learning_rate": 2.1e-05, "loss": 3.1396, "norm_diff": 0.0697, "norm_loss": 0.0, "num_token_doc": 66.8798, "num_token_overlap": 15.8164, "num_token_query": 42.4668, "num_token_union": 68.5939, "num_word_context": 202.6312, "num_word_doc": 49.9137, "num_word_query": 32.1, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4476.5295, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4121, "query_norm": 1.4223, "queue_k_norm": 1.4864, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4668, "sent_len_1": 66.8798, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0513, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1424, "doc_norm": 1.4813, "encoder_q-embeddings": 4504.125, "encoder_q-layer.0": 3034.1355, "encoder_q-layer.1": 3232.821, "encoder_q-layer.10": 2357.9849, "encoder_q-layer.11": 5282.3774, "encoder_q-layer.2": 3546.9973, "encoder_q-layer.3": 3777.6196, "encoder_q-layer.4": 4480.0991, "encoder_q-layer.5": 4652.2622, "encoder_q-layer.6": 4884.0654, "encoder_q-layer.7": 5353.1641, "encoder_q-layer.8": 4816.4976, "encoder_q-layer.9": 3023.562, "epoch": 0.61, "inbatch_neg_score": 0.4167, "inbatch_pos_score": 1.0869, "learning_rate": 2.0944444444444445e-05, "loss": 3.1424, "norm_diff": 0.0551, "norm_loss": 0.0, "num_token_doc": 66.6122, "num_token_overlap": 15.7243, "num_token_query": 42.1173, "num_token_union": 68.3307, "num_word_context": 202.2125, "num_word_doc": 49.7372, "num_word_query": 31.7933, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6203.8799, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4163, "query_norm": 1.4262, "queue_k_norm": 1.4862, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1173, "sent_len_1": 66.6122, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.86, "stdk": 0.0483, "stdq": 0.0448, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.1271, "doc_norm": 1.4862, "encoder_q-embeddings": 2730.1438, "encoder_q-layer.0": 1882.0305, "encoder_q-layer.1": 2132.2246, "encoder_q-layer.10": 2488.3479, "encoder_q-layer.11": 5681.1562, "encoder_q-layer.2": 2479.8269, "encoder_q-layer.3": 2581.3901, "encoder_q-layer.4": 2744.1274, "encoder_q-layer.5": 2665.9319, "encoder_q-layer.6": 2711.8315, "encoder_q-layer.7": 2907.6978, "encoder_q-layer.8": 3173.9517, "encoder_q-layer.9": 2589.9028, "epoch": 0.61, "inbatch_neg_score": 0.4105, "inbatch_pos_score": 1.0879, "learning_rate": 2.088888888888889e-05, "loss": 3.1271, "norm_diff": 0.0512, "norm_loss": 0.0, "num_token_doc": 66.8283, "num_token_overlap": 15.818, "num_token_query": 42.178, "num_token_union": 68.3973, "num_word_context": 202.2316, "num_word_doc": 49.9192, "num_word_query": 31.8532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4449.1455, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4104, "query_norm": 1.435, "queue_k_norm": 1.4859, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.178, "sent_len_1": 66.8283, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9162, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1494, "doc_norm": 1.4878, "encoder_q-embeddings": 2373.4712, "encoder_q-layer.0": 1603.9376, "encoder_q-layer.1": 1736.8402, "encoder_q-layer.10": 2401.5557, "encoder_q-layer.11": 5397.5605, "encoder_q-layer.2": 2077.3113, "encoder_q-layer.3": 2228.6458, "encoder_q-layer.4": 2400.2317, "encoder_q-layer.5": 2488.0115, "encoder_q-layer.6": 2688.8318, "encoder_q-layer.7": 2665.5161, "encoder_q-layer.8": 3001.6619, "encoder_q-layer.9": 2467.241, "epoch": 0.61, "inbatch_neg_score": 0.4139, "inbatch_pos_score": 1.1006, "learning_rate": 2.0833333333333336e-05, "loss": 3.1494, "norm_diff": 0.0657, "norm_loss": 0.0, "num_token_doc": 66.4813, "num_token_overlap": 15.7582, "num_token_query": 42.3351, "num_token_union": 68.3993, "num_word_context": 202.3559, "num_word_doc": 49.6077, "num_word_query": 31.9898, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4063.6973, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4146, "query_norm": 1.4221, "queue_k_norm": 1.4861, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3351, "sent_len_1": 66.4813, "sent_len_max_0": 128.0, "sent_len_max_1": 190.36, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1335, "doc_norm": 1.4838, "encoder_q-embeddings": 1918.8857, "encoder_q-layer.0": 1278.2261, "encoder_q-layer.1": 1356.3767, "encoder_q-layer.10": 2247.5251, "encoder_q-layer.11": 5365.2979, "encoder_q-layer.2": 1518.62, "encoder_q-layer.3": 1629.5905, "encoder_q-layer.4": 1668.6498, "encoder_q-layer.5": 1826.2565, "encoder_q-layer.6": 2046.4828, "encoder_q-layer.7": 2541.7363, "encoder_q-layer.8": 2807.0183, "encoder_q-layer.9": 2392.4685, "epoch": 0.61, "inbatch_neg_score": 0.4147, "inbatch_pos_score": 1.1035, "learning_rate": 2.077777777777778e-05, "loss": 3.1335, "norm_diff": 0.0642, "norm_loss": 0.0, "num_token_doc": 66.7328, "num_token_overlap": 15.7952, "num_token_query": 42.2216, "num_token_union": 68.3816, "num_word_context": 202.2048, "num_word_doc": 49.8076, "num_word_query": 31.9029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3721.8347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4165, "query_norm": 1.4196, "queue_k_norm": 1.4869, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2216, "sent_len_1": 66.7328, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4638, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.12, "doc_norm": 1.4826, "encoder_q-embeddings": 3169.658, "encoder_q-layer.0": 2113.3936, "encoder_q-layer.1": 2275.7805, "encoder_q-layer.10": 2443.4944, "encoder_q-layer.11": 5699.5039, "encoder_q-layer.2": 2594.1235, "encoder_q-layer.3": 2843.4463, "encoder_q-layer.4": 3098.2925, "encoder_q-layer.5": 3363.3828, "encoder_q-layer.6": 3520.4458, "encoder_q-layer.7": 3492.3066, "encoder_q-layer.8": 3531.7058, "encoder_q-layer.9": 2393.3167, "epoch": 0.61, "inbatch_neg_score": 0.4139, "inbatch_pos_score": 1.084, "learning_rate": 2.0722222222222224e-05, "loss": 3.12, "norm_diff": 0.0571, "norm_loss": 0.0, "num_token_doc": 66.5667, "num_token_overlap": 15.7878, "num_token_query": 42.2948, "num_token_union": 68.3456, "num_word_context": 201.9783, "num_word_doc": 49.6806, "num_word_query": 31.9426, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4893.3574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4133, "query_norm": 1.4255, "queue_k_norm": 1.4855, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2948, "sent_len_1": 66.5667, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0, "stdk": 0.0483, "stdq": 0.0451, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1135, "doc_norm": 1.4905, "encoder_q-embeddings": 2386.0759, "encoder_q-layer.0": 1671.5658, "encoder_q-layer.1": 1765.4434, "encoder_q-layer.10": 2567.1355, "encoder_q-layer.11": 5778.6172, "encoder_q-layer.2": 2026.7534, "encoder_q-layer.3": 2205.2556, "encoder_q-layer.4": 2361.7451, "encoder_q-layer.5": 2297.2422, "encoder_q-layer.6": 2522.7212, "encoder_q-layer.7": 2460.4592, "encoder_q-layer.8": 2634.5586, "encoder_q-layer.9": 2279.3369, "epoch": 0.61, "inbatch_neg_score": 0.4117, "inbatch_pos_score": 1.0947, "learning_rate": 2.0666666666666666e-05, "loss": 3.1135, "norm_diff": 0.0622, "norm_loss": 0.0, "num_token_doc": 66.6812, "num_token_overlap": 15.8362, "num_token_query": 42.2671, "num_token_union": 68.3428, "num_word_context": 201.8558, "num_word_doc": 49.7825, "num_word_query": 31.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4050.0645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4114, "query_norm": 1.4283, "queue_k_norm": 1.4879, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2671, "sent_len_1": 66.6812, "sent_len_max_0": 128.0, "sent_len_max_1": 188.385, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1376, "doc_norm": 1.4867, "encoder_q-embeddings": 2466.5715, "encoder_q-layer.0": 1723.4254, "encoder_q-layer.1": 1930.3684, "encoder_q-layer.10": 2500.5894, "encoder_q-layer.11": 5801.6772, "encoder_q-layer.2": 2039.1376, "encoder_q-layer.3": 2084.845, "encoder_q-layer.4": 2147.6516, "encoder_q-layer.5": 2229.5659, "encoder_q-layer.6": 2338.7246, "encoder_q-layer.7": 2457.0825, "encoder_q-layer.8": 2766.5527, "encoder_q-layer.9": 2464.6265, "epoch": 0.61, "inbatch_neg_score": 0.4181, "inbatch_pos_score": 1.084, "learning_rate": 2.0611111111111112e-05, "loss": 3.1376, "norm_diff": 0.0552, "norm_loss": 0.0, "num_token_doc": 66.5699, "num_token_overlap": 15.7475, "num_token_query": 42.2513, "num_token_union": 68.4035, "num_word_context": 202.0562, "num_word_doc": 49.6501, "num_word_query": 31.92, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4097.8882, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4175, "query_norm": 1.4316, "queue_k_norm": 1.4877, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2513, "sent_len_1": 66.5699, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.1525, "stdk": 0.0484, "stdq": 0.0453, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1313, "doc_norm": 1.495, "encoder_q-embeddings": 2613.5171, "encoder_q-layer.0": 1768.0184, "encoder_q-layer.1": 1907.2025, "encoder_q-layer.10": 2473.7698, "encoder_q-layer.11": 5419.4092, "encoder_q-layer.2": 2242.8713, "encoder_q-layer.3": 2329.7673, "encoder_q-layer.4": 2739.2368, "encoder_q-layer.5": 2794.7688, "encoder_q-layer.6": 2804.6462, "encoder_q-layer.7": 2900.7861, "encoder_q-layer.8": 2959.5842, "encoder_q-layer.9": 2417.345, "epoch": 0.62, "inbatch_neg_score": 0.4146, "inbatch_pos_score": 1.0957, "learning_rate": 2.0555555555555555e-05, "loss": 3.1313, "norm_diff": 0.0788, "norm_loss": 0.0, "num_token_doc": 66.9099, "num_token_overlap": 15.7901, "num_token_query": 42.2664, "num_token_union": 68.5685, "num_word_context": 202.5863, "num_word_doc": 49.9231, "num_word_query": 31.9195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4292.2222, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4141, "query_norm": 1.4162, "queue_k_norm": 1.4877, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2664, "sent_len_1": 66.9099, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9988, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.1295, "doc_norm": 1.4888, "encoder_q-embeddings": 2734.2197, "encoder_q-layer.0": 1857.5814, "encoder_q-layer.1": 2099.167, "encoder_q-layer.10": 2530.3311, "encoder_q-layer.11": 5610.6411, "encoder_q-layer.2": 2314.9407, "encoder_q-layer.3": 2429.3215, "encoder_q-layer.4": 2562.4534, "encoder_q-layer.5": 2711.042, "encoder_q-layer.6": 2942.7986, "encoder_q-layer.7": 2947.8145, "encoder_q-layer.8": 3046.4285, "encoder_q-layer.9": 2462.366, "epoch": 0.62, "inbatch_neg_score": 0.414, "inbatch_pos_score": 1.0742, "learning_rate": 2.05e-05, "loss": 3.1295, "norm_diff": 0.0647, "norm_loss": 0.0, "num_token_doc": 66.6835, "num_token_overlap": 15.7567, "num_token_query": 42.2918, "num_token_union": 68.4584, "num_word_context": 202.1782, "num_word_doc": 49.7721, "num_word_query": 31.9366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4407.8084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4128, "query_norm": 1.4241, "queue_k_norm": 1.4905, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2918, "sent_len_1": 66.6835, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.89, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1325, "doc_norm": 1.4875, "encoder_q-embeddings": 2508.7622, "encoder_q-layer.0": 1701.193, "encoder_q-layer.1": 1892.324, "encoder_q-layer.10": 2488.9353, "encoder_q-layer.11": 5503.2832, "encoder_q-layer.2": 2182.3137, "encoder_q-layer.3": 2304.6736, "encoder_q-layer.4": 2613.9524, "encoder_q-layer.5": 2645.4829, "encoder_q-layer.6": 3249.4724, "encoder_q-layer.7": 3650.6433, "encoder_q-layer.8": 3922.407, "encoder_q-layer.9": 2684.7637, "epoch": 0.62, "inbatch_neg_score": 0.4171, "inbatch_pos_score": 1.0742, "learning_rate": 2.0444444444444446e-05, "loss": 3.1325, "norm_diff": 0.0827, "norm_loss": 0.0, "num_token_doc": 66.6071, "num_token_overlap": 15.7415, "num_token_query": 42.2016, "num_token_union": 68.3486, "num_word_context": 202.1106, "num_word_doc": 49.7214, "num_word_query": 31.8744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4592.4809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4172, "query_norm": 1.4048, "queue_k_norm": 1.4902, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2016, "sent_len_1": 66.6071, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9137, "stdk": 0.0484, "stdq": 0.0443, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1315, "doc_norm": 1.4923, "encoder_q-embeddings": 4974.2329, "encoder_q-layer.0": 3325.8347, "encoder_q-layer.1": 3691.2283, "encoder_q-layer.10": 4523.1333, "encoder_q-layer.11": 10466.416, "encoder_q-layer.2": 4331.9521, "encoder_q-layer.3": 4622.9414, "encoder_q-layer.4": 4921.3179, "encoder_q-layer.5": 5573.7983, "encoder_q-layer.6": 6200.1875, "encoder_q-layer.7": 6126.9092, "encoder_q-layer.8": 6434.0098, "encoder_q-layer.9": 5002.6152, "epoch": 0.62, "inbatch_neg_score": 0.4104, "inbatch_pos_score": 1.1094, "learning_rate": 2.0388888888888892e-05, "loss": 3.1315, "norm_diff": 0.0693, "norm_loss": 0.0, "num_token_doc": 66.7064, "num_token_overlap": 15.8035, "num_token_query": 42.2275, "num_token_union": 68.3777, "num_word_context": 201.9219, "num_word_doc": 49.7489, "num_word_query": 31.8957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8546.5611, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4114, "query_norm": 1.423, "queue_k_norm": 1.4887, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2275, "sent_len_1": 66.7064, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5462, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1227, "doc_norm": 1.4926, "encoder_q-embeddings": 14395.5723, "encoder_q-layer.0": 10308.5264, "encoder_q-layer.1": 12254.96, "encoder_q-layer.10": 5335.4292, "encoder_q-layer.11": 11498.6143, "encoder_q-layer.2": 14585.542, "encoder_q-layer.3": 15543.6387, "encoder_q-layer.4": 16990.2676, "encoder_q-layer.5": 18336.6953, "encoder_q-layer.6": 16848.5195, "encoder_q-layer.7": 15091.293, "encoder_q-layer.8": 11142.9521, "encoder_q-layer.9": 6251.4932, "epoch": 0.62, "inbatch_neg_score": 0.41, "inbatch_pos_score": 1.0967, "learning_rate": 2.0333333333333334e-05, "loss": 3.1227, "norm_diff": 0.0715, "norm_loss": 0.0, "num_token_doc": 66.8077, "num_token_overlap": 15.8401, "num_token_query": 42.3543, "num_token_union": 68.5059, "num_word_context": 202.4082, "num_word_doc": 49.8739, "num_word_query": 32.0007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20339.8685, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.4099, "query_norm": 1.4211, "queue_k_norm": 1.4905, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3543, "sent_len_1": 66.8077, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.7325, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0968, "doc_norm": 1.493, "encoder_q-embeddings": 2471.1716, "encoder_q-layer.0": 1563.8751, "encoder_q-layer.1": 1684.1918, "encoder_q-layer.10": 2627.5977, "encoder_q-layer.11": 5635.9946, "encoder_q-layer.2": 1970.2937, "encoder_q-layer.3": 2021.515, "encoder_q-layer.4": 2134.2854, "encoder_q-layer.5": 2066.1838, "encoder_q-layer.6": 2271.2346, "encoder_q-layer.7": 2396.2754, "encoder_q-layer.8": 2688.6541, "encoder_q-layer.9": 2433.9619, "epoch": 0.62, "inbatch_neg_score": 0.4117, "inbatch_pos_score": 1.1006, "learning_rate": 2.027777777777778e-05, "loss": 3.0968, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.7587, "num_token_overlap": 15.8831, "num_token_query": 42.4677, "num_token_union": 68.5012, "num_word_context": 201.9076, "num_word_doc": 49.8379, "num_word_query": 32.0689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3994.517, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4131, "query_norm": 1.4304, "queue_k_norm": 1.4896, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4677, "sent_len_1": 66.7587, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.9013, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 52.7344, "active_queue_size": 16384.0, "cl_loss": 3.1258, "doc_norm": 1.4818, "encoder_q-embeddings": 15150.2334, "encoder_q-layer.0": 10607.3076, "encoder_q-layer.1": 12442.0811, "encoder_q-layer.10": 2835.5547, "encoder_q-layer.11": 5865.814, "encoder_q-layer.2": 15250.0537, "encoder_q-layer.3": 16901.4902, "encoder_q-layer.4": 18456.7773, "encoder_q-layer.5": 19575.2305, "encoder_q-layer.6": 17616.1504, "encoder_q-layer.7": 15375.1885, "encoder_q-layer.8": 10169.0918, "encoder_q-layer.9": 3914.5793, "epoch": 0.62, "inbatch_neg_score": 0.418, "inbatch_pos_score": 1.0537, "learning_rate": 2.0222222222222222e-05, "loss": 3.1258, "norm_diff": 0.0792, "norm_loss": 0.0, "num_token_doc": 67.0504, "num_token_overlap": 15.8627, "num_token_query": 42.333, "num_token_union": 68.5808, "num_word_context": 202.8168, "num_word_doc": 50.0312, "num_word_query": 31.9878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20170.0605, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.417, "query_norm": 1.4026, "queue_k_norm": 1.4904, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.333, "sent_len_1": 67.0504, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.08, "stdk": 0.0482, "stdq": 0.0442, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 59.2773, "active_queue_size": 16384.0, "cl_loss": 3.1205, "doc_norm": 1.4903, "encoder_q-embeddings": 4109.6187, "encoder_q-layer.0": 2931.3987, "encoder_q-layer.1": 3437.5056, "encoder_q-layer.10": 2360.7036, "encoder_q-layer.11": 5228.8086, "encoder_q-layer.2": 3766.8081, "encoder_q-layer.3": 4018.4429, "encoder_q-layer.4": 4271.1533, "encoder_q-layer.5": 4562.3838, "encoder_q-layer.6": 5024.8652, "encoder_q-layer.7": 4708.5557, "encoder_q-layer.8": 3603.9019, "encoder_q-layer.9": 2397.6602, "epoch": 0.62, "inbatch_neg_score": 0.415, "inbatch_pos_score": 1.1191, "learning_rate": 2.0166666666666668e-05, "loss": 3.1205, "norm_diff": 0.0531, "norm_loss": 0.0, "num_token_doc": 66.7641, "num_token_overlap": 15.8187, "num_token_query": 42.3066, "num_token_union": 68.4724, "num_word_context": 202.2269, "num_word_doc": 49.807, "num_word_query": 31.9811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5914.6658, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4153, "query_norm": 1.4373, "queue_k_norm": 1.4893, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3066, "sent_len_1": 66.7641, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5037, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.1222, "doc_norm": 1.4829, "encoder_q-embeddings": 1207.9523, "encoder_q-layer.0": 764.1958, "encoder_q-layer.1": 840.92, "encoder_q-layer.10": 1296.8354, "encoder_q-layer.11": 2784.4316, "encoder_q-layer.2": 939.6022, "encoder_q-layer.3": 940.7244, "encoder_q-layer.4": 1021.7247, "encoder_q-layer.5": 961.1821, "encoder_q-layer.6": 1060.2784, "encoder_q-layer.7": 1228.8715, "encoder_q-layer.8": 1356.0872, "encoder_q-layer.9": 1200.0159, "epoch": 0.62, "inbatch_neg_score": 0.4174, "inbatch_pos_score": 1.1016, "learning_rate": 2.011111111111111e-05, "loss": 3.1222, "norm_diff": 0.0506, "norm_loss": 0.0, "num_token_doc": 66.613, "num_token_overlap": 15.7987, "num_token_query": 42.0967, "num_token_union": 68.2705, "num_word_context": 202.025, "num_word_doc": 49.7232, "num_word_query": 31.7872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1944.7476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4175, "query_norm": 1.4323, "queue_k_norm": 1.49, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.0967, "sent_len_1": 66.613, "sent_len_max_0": 127.98, "sent_len_max_1": 189.535, "stdk": 0.0482, "stdq": 0.0454, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 51.7578, "active_queue_size": 16384.0, "cl_loss": 3.1265, "doc_norm": 1.4892, "encoder_q-embeddings": 2570.0828, "encoder_q-layer.0": 1714.7483, "encoder_q-layer.1": 1963.5502, "encoder_q-layer.10": 1408.9926, "encoder_q-layer.11": 3162.0244, "encoder_q-layer.2": 2138.1558, "encoder_q-layer.3": 2322.4822, "encoder_q-layer.4": 2327.6287, "encoder_q-layer.5": 2188.8286, "encoder_q-layer.6": 2102.3188, "encoder_q-layer.7": 2086.5947, "encoder_q-layer.8": 2066.1875, "encoder_q-layer.9": 1455.238, "epoch": 0.62, "inbatch_neg_score": 0.4201, "inbatch_pos_score": 1.0752, "learning_rate": 2.0055555555555556e-05, "loss": 3.1265, "norm_diff": 0.0598, "norm_loss": 0.0, "num_token_doc": 66.627, "num_token_overlap": 15.7788, "num_token_query": 42.2959, "num_token_union": 68.4789, "num_word_context": 202.5559, "num_word_doc": 49.7778, "num_word_query": 31.9311, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3254.5165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4194, "query_norm": 1.4294, "queue_k_norm": 1.4912, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2959, "sent_len_1": 66.627, "sent_len_max_0": 128.0, "sent_len_max_1": 187.225, "stdk": 0.0484, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0905, "doc_norm": 1.4981, "encoder_q-embeddings": 2026.759, "encoder_q-layer.0": 1407.7035, "encoder_q-layer.1": 1631.6273, "encoder_q-layer.10": 1231.1234, "encoder_q-layer.11": 2840.136, "encoder_q-layer.2": 1897.5186, "encoder_q-layer.3": 1964.4144, "encoder_q-layer.4": 2133.0444, "encoder_q-layer.5": 2170.8328, "encoder_q-layer.6": 2175.6541, "encoder_q-layer.7": 2136.5833, "encoder_q-layer.8": 1934.9023, "encoder_q-layer.9": 1259.0062, "epoch": 0.62, "inbatch_neg_score": 0.4232, "inbatch_pos_score": 1.1123, "learning_rate": 2e-05, "loss": 3.0905, "norm_diff": 0.0664, "norm_loss": 0.0, "num_token_doc": 66.7797, "num_token_overlap": 15.8298, "num_token_query": 42.4533, "num_token_union": 68.4911, "num_word_context": 202.2963, "num_word_doc": 49.8005, "num_word_query": 32.0795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2930.8281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4231, "query_norm": 1.4317, "queue_k_norm": 1.4906, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4533, "sent_len_1": 66.7797, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.785, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1173, "doc_norm": 1.4934, "encoder_q-embeddings": 1578.354, "encoder_q-layer.0": 1102.5095, "encoder_q-layer.1": 1264.4448, "encoder_q-layer.10": 1232.6671, "encoder_q-layer.11": 2805.7668, "encoder_q-layer.2": 1456.2854, "encoder_q-layer.3": 1478.6853, "encoder_q-layer.4": 1586.5957, "encoder_q-layer.5": 1664.4854, "encoder_q-layer.6": 1665.3331, "encoder_q-layer.7": 1726.5682, "encoder_q-layer.8": 1538.2684, "encoder_q-layer.9": 1256.4092, "epoch": 0.63, "inbatch_neg_score": 0.4223, "inbatch_pos_score": 1.0898, "learning_rate": 1.9944444444444447e-05, "loss": 3.1173, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.8959, "num_token_overlap": 15.7377, "num_token_query": 42.1486, "num_token_union": 68.5673, "num_word_context": 202.3738, "num_word_doc": 49.9228, "num_word_query": 31.8021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2431.4114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4221, "query_norm": 1.4211, "queue_k_norm": 1.4919, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1486, "sent_len_1": 66.8959, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.9187, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1128, "doc_norm": 1.4955, "encoder_q-embeddings": 1779.4409, "encoder_q-layer.0": 1149.7668, "encoder_q-layer.1": 1375.1406, "encoder_q-layer.10": 1263.7982, "encoder_q-layer.11": 2946.696, "encoder_q-layer.2": 1464.9424, "encoder_q-layer.3": 1360.2817, "encoder_q-layer.4": 1499.0391, "encoder_q-layer.5": 1412.9941, "encoder_q-layer.6": 1440.373, "encoder_q-layer.7": 1359.7633, "encoder_q-layer.8": 1492.4343, "encoder_q-layer.9": 1230.2306, "epoch": 0.63, "inbatch_neg_score": 0.4243, "inbatch_pos_score": 1.1025, "learning_rate": 1.988888888888889e-05, "loss": 3.1128, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.614, "num_token_overlap": 15.8068, "num_token_query": 42.2955, "num_token_union": 68.3945, "num_word_context": 202.0955, "num_word_doc": 49.7086, "num_word_query": 31.9645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2393.2466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4246, "query_norm": 1.4231, "queue_k_norm": 1.4938, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2955, "sent_len_1": 66.614, "sent_len_max_0": 127.995, "sent_len_max_1": 190.3525, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 3.1119, "doc_norm": 1.4951, "encoder_q-embeddings": 511.7678, "encoder_q-layer.0": 347.604, "encoder_q-layer.1": 358.9576, "encoder_q-layer.10": 595.4731, "encoder_q-layer.11": 1322.124, "encoder_q-layer.2": 410.8705, "encoder_q-layer.3": 423.2193, "encoder_q-layer.4": 463.2871, "encoder_q-layer.5": 478.2468, "encoder_q-layer.6": 531.7277, "encoder_q-layer.7": 565.372, "encoder_q-layer.8": 655.6404, "encoder_q-layer.9": 607.0265, "epoch": 0.63, "inbatch_neg_score": 0.4221, "inbatch_pos_score": 1.1123, "learning_rate": 1.9833333333333335e-05, "loss": 3.1119, "norm_diff": 0.081, "norm_loss": 0.0, "num_token_doc": 66.7648, "num_token_overlap": 15.8665, "num_token_query": 42.3149, "num_token_union": 68.4357, "num_word_context": 202.1384, "num_word_doc": 49.859, "num_word_query": 31.9858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 915.6587, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4219, "query_norm": 1.4142, "queue_k_norm": 1.4928, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3149, "sent_len_1": 66.7648, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5387, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.1082, "doc_norm": 1.4939, "encoder_q-embeddings": 659.0377, "encoder_q-layer.0": 451.8174, "encoder_q-layer.1": 515.5441, "encoder_q-layer.10": 637.624, "encoder_q-layer.11": 1334.333, "encoder_q-layer.2": 581.7409, "encoder_q-layer.3": 581.3382, "encoder_q-layer.4": 614.4222, "encoder_q-layer.5": 615.3805, "encoder_q-layer.6": 678.5844, "encoder_q-layer.7": 720.1456, "encoder_q-layer.8": 737.2244, "encoder_q-layer.9": 624.9421, "epoch": 0.63, "inbatch_neg_score": 0.4236, "inbatch_pos_score": 1.1162, "learning_rate": 1.9777777777777778e-05, "loss": 3.1082, "norm_diff": 0.0624, "norm_loss": 0.0, "num_token_doc": 66.9573, "num_token_overlap": 15.8901, "num_token_query": 42.3776, "num_token_union": 68.5709, "num_word_context": 202.8663, "num_word_doc": 49.9941, "num_word_query": 32.0, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.8205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4226, "query_norm": 1.4315, "queue_k_norm": 1.4934, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3776, "sent_len_1": 66.9573, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2912, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.1091, "doc_norm": 1.4956, "encoder_q-embeddings": 1169.0519, "encoder_q-layer.0": 822.0148, "encoder_q-layer.1": 880.5168, "encoder_q-layer.10": 594.079, "encoder_q-layer.11": 1380.4863, "encoder_q-layer.2": 1104.5225, "encoder_q-layer.3": 1243.9268, "encoder_q-layer.4": 1393.8401, "encoder_q-layer.5": 1478.803, "encoder_q-layer.6": 1461.4407, "encoder_q-layer.7": 1127.9313, "encoder_q-layer.8": 901.134, "encoder_q-layer.9": 617.5362, "epoch": 0.63, "inbatch_neg_score": 0.4233, "inbatch_pos_score": 1.1094, "learning_rate": 1.9722222222222224e-05, "loss": 3.1091, "norm_diff": 0.0593, "norm_loss": 0.0, "num_token_doc": 66.7815, "num_token_overlap": 15.8487, "num_token_query": 42.3403, "num_token_union": 68.4614, "num_word_context": 202.4249, "num_word_doc": 49.8811, "num_word_query": 32.0047, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1668.3355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4226, "query_norm": 1.4364, "queue_k_norm": 1.4942, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3403, "sent_len_1": 66.7815, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.2625, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0971, "doc_norm": 1.4917, "encoder_q-embeddings": 736.7959, "encoder_q-layer.0": 484.496, "encoder_q-layer.1": 553.2607, "encoder_q-layer.10": 566.5687, "encoder_q-layer.11": 1306.7814, "encoder_q-layer.2": 655.3638, "encoder_q-layer.3": 667.3839, "encoder_q-layer.4": 726.3568, "encoder_q-layer.5": 760.9174, "encoder_q-layer.6": 802.7264, "encoder_q-layer.7": 793.4769, "encoder_q-layer.8": 831.5066, "encoder_q-layer.9": 632.4556, "epoch": 0.63, "inbatch_neg_score": 0.4235, "inbatch_pos_score": 1.0869, "learning_rate": 1.9666666666666666e-05, "loss": 3.0971, "norm_diff": 0.0682, "norm_loss": 0.0, "num_token_doc": 66.9448, "num_token_overlap": 15.9053, "num_token_query": 42.5032, "num_token_union": 68.6217, "num_word_context": 202.4009, "num_word_doc": 49.9611, "num_word_query": 32.107, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1141.6681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4243, "query_norm": 1.4235, "queue_k_norm": 1.4951, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5032, "sent_len_1": 66.9448, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8162, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 59.5703, "active_queue_size": 16384.0, "cl_loss": 3.1213, "doc_norm": 1.4909, "encoder_q-embeddings": 631.6816, "encoder_q-layer.0": 421.1059, "encoder_q-layer.1": 458.2871, "encoder_q-layer.10": 626.0532, "encoder_q-layer.11": 1347.8433, "encoder_q-layer.2": 523.1439, "encoder_q-layer.3": 554.4349, "encoder_q-layer.4": 597.0929, "encoder_q-layer.5": 631.0473, "encoder_q-layer.6": 662.8177, "encoder_q-layer.7": 679.0388, "encoder_q-layer.8": 707.9863, "encoder_q-layer.9": 591.665, "epoch": 0.63, "inbatch_neg_score": 0.4267, "inbatch_pos_score": 1.1328, "learning_rate": 1.9611111111111115e-05, "loss": 3.1213, "norm_diff": 0.0417, "norm_loss": 0.0, "num_token_doc": 66.8438, "num_token_overlap": 15.765, "num_token_query": 42.334, "num_token_union": 68.588, "num_word_context": 202.5164, "num_word_doc": 49.863, "num_word_query": 31.9771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1033.3416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4258, "query_norm": 1.4493, "queue_k_norm": 1.494, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.334, "sent_len_1": 66.8438, "sent_len_max_0": 128.0, "sent_len_max_1": 189.19, "stdk": 0.0484, "stdq": 0.0457, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1013, "doc_norm": 1.4981, "encoder_q-embeddings": 625.8788, "encoder_q-layer.0": 417.1626, "encoder_q-layer.1": 487.101, "encoder_q-layer.10": 607.4105, "encoder_q-layer.11": 1401.1543, "encoder_q-layer.2": 558.0753, "encoder_q-layer.3": 574.1061, "encoder_q-layer.4": 667.6901, "encoder_q-layer.5": 689.2069, "encoder_q-layer.6": 726.0471, "encoder_q-layer.7": 661.1168, "encoder_q-layer.8": 667.3721, "encoder_q-layer.9": 577.5941, "epoch": 0.63, "inbatch_neg_score": 0.4341, "inbatch_pos_score": 1.1064, "learning_rate": 1.9555555555555557e-05, "loss": 3.1013, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.8598, "num_token_overlap": 15.818, "num_token_query": 42.3101, "num_token_union": 68.4878, "num_word_context": 202.2724, "num_word_doc": 49.8883, "num_word_query": 31.9705, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1062.0893, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4331, "query_norm": 1.4268, "queue_k_norm": 1.4937, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3101, "sent_len_1": 66.8598, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.535, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.1146, "doc_norm": 1.4959, "encoder_q-embeddings": 664.3619, "encoder_q-layer.0": 513.0373, "encoder_q-layer.1": 558.1069, "encoder_q-layer.10": 541.3428, "encoder_q-layer.11": 1355.0983, "encoder_q-layer.2": 629.7183, "encoder_q-layer.3": 619.8199, "encoder_q-layer.4": 622.4481, "encoder_q-layer.5": 598.4481, "encoder_q-layer.6": 678.8785, "encoder_q-layer.7": 673.6323, "encoder_q-layer.8": 656.9552, "encoder_q-layer.9": 575.633, "epoch": 0.63, "inbatch_neg_score": 0.4391, "inbatch_pos_score": 1.1279, "learning_rate": 1.9500000000000003e-05, "loss": 3.1146, "norm_diff": 0.0534, "norm_loss": 0.0, "num_token_doc": 66.6546, "num_token_overlap": 15.8334, "num_token_query": 42.1856, "num_token_union": 68.3096, "num_word_context": 202.3041, "num_word_doc": 49.7072, "num_word_query": 31.8559, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1061.8205, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4399, "query_norm": 1.4425, "queue_k_norm": 1.4959, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1856, "sent_len_1": 66.6546, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.465, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1293, "doc_norm": 1.495, "encoder_q-embeddings": 4598.4155, "encoder_q-layer.0": 3198.6152, "encoder_q-layer.1": 3445.9878, "encoder_q-layer.10": 639.0722, "encoder_q-layer.11": 1342.6263, "encoder_q-layer.2": 4239.3892, "encoder_q-layer.3": 3853.5781, "encoder_q-layer.4": 3435.0383, "encoder_q-layer.5": 3383.9019, "encoder_q-layer.6": 2822.8586, "encoder_q-layer.7": 2267.3469, "encoder_q-layer.8": 1776.9055, "encoder_q-layer.9": 929.4469, "epoch": 0.63, "inbatch_neg_score": 0.4413, "inbatch_pos_score": 1.1289, "learning_rate": 1.9444444444444445e-05, "loss": 3.1293, "norm_diff": 0.0583, "norm_loss": 0.0, "num_token_doc": 66.8396, "num_token_overlap": 15.7607, "num_token_query": 42.3033, "num_token_union": 68.5764, "num_word_context": 202.5753, "num_word_doc": 49.8906, "num_word_query": 31.9604, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4651.4359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4404, "query_norm": 1.4367, "queue_k_norm": 1.4971, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3033, "sent_len_1": 66.8396, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.1175, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.1232, "doc_norm": 1.4981, "encoder_q-embeddings": 1073.7002, "encoder_q-layer.0": 828.874, "encoder_q-layer.1": 874.5768, "encoder_q-layer.10": 592.199, "encoder_q-layer.11": 1391.9297, "encoder_q-layer.2": 947.8571, "encoder_q-layer.3": 1014.2306, "encoder_q-layer.4": 1107.8137, "encoder_q-layer.5": 1109.6958, "encoder_q-layer.6": 1261.6361, "encoder_q-layer.7": 1185.9927, "encoder_q-layer.8": 1056.4999, "encoder_q-layer.9": 712.9183, "epoch": 0.64, "inbatch_neg_score": 0.4436, "inbatch_pos_score": 1.1172, "learning_rate": 1.938888888888889e-05, "loss": 3.1232, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.8308, "num_token_overlap": 15.808, "num_token_query": 42.3018, "num_token_union": 68.5517, "num_word_context": 202.3553, "num_word_doc": 49.8951, "num_word_query": 31.9632, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1543.2392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4434, "query_norm": 1.4464, "queue_k_norm": 1.4966, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3018, "sent_len_1": 66.8308, "sent_len_max_0": 127.99, "sent_len_max_1": 188.2537, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.1132, "doc_norm": 1.5008, "encoder_q-embeddings": 602.5251, "encoder_q-layer.0": 404.4452, "encoder_q-layer.1": 429.7744, "encoder_q-layer.10": 575.2767, "encoder_q-layer.11": 1336.2637, "encoder_q-layer.2": 491.9893, "encoder_q-layer.3": 509.1837, "encoder_q-layer.4": 536.6706, "encoder_q-layer.5": 521.0449, "encoder_q-layer.6": 577.2601, "encoder_q-layer.7": 602.5534, "encoder_q-layer.8": 662.3735, "encoder_q-layer.9": 579.5715, "epoch": 0.64, "inbatch_neg_score": 0.4467, "inbatch_pos_score": 1.1348, "learning_rate": 1.9333333333333333e-05, "loss": 3.1132, "norm_diff": 0.0498, "norm_loss": 0.0, "num_token_doc": 66.9217, "num_token_overlap": 15.8282, "num_token_query": 42.3674, "num_token_union": 68.5936, "num_word_context": 202.6159, "num_word_doc": 49.9388, "num_word_query": 32.0159, "postclip_grad_norm": 1.0, "preclip_grad_norm": 964.7123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4468, "query_norm": 1.451, "queue_k_norm": 1.4962, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3674, "sent_len_1": 66.9217, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6488, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1154, "doc_norm": 1.5006, "encoder_q-embeddings": 571.6704, "encoder_q-layer.0": 377.397, "encoder_q-layer.1": 406.5423, "encoder_q-layer.10": 597.9398, "encoder_q-layer.11": 1387.124, "encoder_q-layer.2": 474.9984, "encoder_q-layer.3": 479.9701, "encoder_q-layer.4": 519.8776, "encoder_q-layer.5": 529.8534, "encoder_q-layer.6": 582.8266, "encoder_q-layer.7": 599.2867, "encoder_q-layer.8": 688.9822, "encoder_q-layer.9": 588.4445, "epoch": 0.64, "inbatch_neg_score": 0.4561, "inbatch_pos_score": 1.1279, "learning_rate": 1.927777777777778e-05, "loss": 3.1154, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.6422, "num_token_overlap": 15.7871, "num_token_query": 42.1911, "num_token_union": 68.3204, "num_word_context": 201.9878, "num_word_doc": 49.6954, "num_word_query": 31.8467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 979.9234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4556, "query_norm": 1.4363, "queue_k_norm": 1.4971, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1911, "sent_len_1": 66.6422, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2625, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.1051, "doc_norm": 1.4976, "encoder_q-embeddings": 783.0073, "encoder_q-layer.0": 519.5304, "encoder_q-layer.1": 585.2255, "encoder_q-layer.10": 635.3466, "encoder_q-layer.11": 1459.4109, "encoder_q-layer.2": 738.7159, "encoder_q-layer.3": 808.6796, "encoder_q-layer.4": 848.9263, "encoder_q-layer.5": 791.5366, "encoder_q-layer.6": 820.4916, "encoder_q-layer.7": 806.6111, "encoder_q-layer.8": 799.9402, "encoder_q-layer.9": 649.9539, "epoch": 0.64, "inbatch_neg_score": 0.456, "inbatch_pos_score": 1.1328, "learning_rate": 1.922222222222222e-05, "loss": 3.1051, "norm_diff": 0.0545, "norm_loss": 0.0, "num_token_doc": 66.9763, "num_token_overlap": 15.9125, "num_token_query": 42.5097, "num_token_union": 68.6319, "num_word_context": 202.5198, "num_word_doc": 49.9892, "num_word_query": 32.144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1216.6393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4558, "query_norm": 1.443, "queue_k_norm": 1.4988, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5097, "sent_len_1": 66.9763, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4725, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0871, "doc_norm": 1.5055, "encoder_q-embeddings": 622.8406, "encoder_q-layer.0": 423.5999, "encoder_q-layer.1": 487.4711, "encoder_q-layer.10": 577.9794, "encoder_q-layer.11": 1361.097, "encoder_q-layer.2": 514.7615, "encoder_q-layer.3": 551.2228, "encoder_q-layer.4": 625.2433, "encoder_q-layer.5": 595.5591, "encoder_q-layer.6": 681.4882, "encoder_q-layer.7": 668.8358, "encoder_q-layer.8": 694.3527, "encoder_q-layer.9": 576.9667, "epoch": 0.64, "inbatch_neg_score": 0.4584, "inbatch_pos_score": 1.1504, "learning_rate": 1.9166666666666667e-05, "loss": 3.0871, "norm_diff": 0.0638, "norm_loss": 0.0, "num_token_doc": 66.8452, "num_token_overlap": 15.8404, "num_token_query": 42.2805, "num_token_union": 68.508, "num_word_context": 202.2297, "num_word_doc": 49.8639, "num_word_query": 31.9453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1031.464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.457, "query_norm": 1.4417, "queue_k_norm": 1.4999, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2805, "sent_len_1": 66.8452, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.7188, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.1008, "doc_norm": 1.5049, "encoder_q-embeddings": 572.7633, "encoder_q-layer.0": 386.9523, "encoder_q-layer.1": 431.9468, "encoder_q-layer.10": 565.7965, "encoder_q-layer.11": 1288.952, "encoder_q-layer.2": 469.0772, "encoder_q-layer.3": 475.2577, "encoder_q-layer.4": 489.8352, "encoder_q-layer.5": 489.3497, "encoder_q-layer.6": 537.1231, "encoder_q-layer.7": 577.7215, "encoder_q-layer.8": 616.9221, "encoder_q-layer.9": 541.0295, "epoch": 0.64, "inbatch_neg_score": 0.4572, "inbatch_pos_score": 1.1318, "learning_rate": 1.9111111111111113e-05, "loss": 3.1008, "norm_diff": 0.0654, "norm_loss": 0.0, "num_token_doc": 66.8294, "num_token_overlap": 15.857, "num_token_query": 42.3322, "num_token_union": 68.5079, "num_word_context": 202.423, "num_word_doc": 49.9134, "num_word_query": 32.0043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 922.2969, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4578, "query_norm": 1.4396, "queue_k_norm": 1.4999, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3322, "sent_len_1": 66.8294, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.7612, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.1089, "doc_norm": 1.5008, "encoder_q-embeddings": 689.7799, "encoder_q-layer.0": 487.5224, "encoder_q-layer.1": 563.2623, "encoder_q-layer.10": 611.1412, "encoder_q-layer.11": 1413.4215, "encoder_q-layer.2": 635.7933, "encoder_q-layer.3": 669.3931, "encoder_q-layer.4": 669.4611, "encoder_q-layer.5": 608.6677, "encoder_q-layer.6": 621.4264, "encoder_q-layer.7": 623.9728, "encoder_q-layer.8": 707.9735, "encoder_q-layer.9": 609.3184, "epoch": 0.64, "inbatch_neg_score": 0.4644, "inbatch_pos_score": 1.1279, "learning_rate": 1.905555555555556e-05, "loss": 3.1089, "norm_diff": 0.0579, "norm_loss": 0.0, "num_token_doc": 66.7846, "num_token_overlap": 15.8277, "num_token_query": 42.2291, "num_token_union": 68.4307, "num_word_context": 202.003, "num_word_doc": 49.8241, "num_word_query": 31.902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1072.4636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4629, "query_norm": 1.443, "queue_k_norm": 1.4999, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2291, "sent_len_1": 66.7846, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.6463, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.1047, "doc_norm": 1.5055, "encoder_q-embeddings": 515.5462, "encoder_q-layer.0": 331.6562, "encoder_q-layer.1": 351.6799, "encoder_q-layer.10": 581.0876, "encoder_q-layer.11": 1367.6722, "encoder_q-layer.2": 376.6343, "encoder_q-layer.3": 408.7075, "encoder_q-layer.4": 432.5271, "encoder_q-layer.5": 443.2751, "encoder_q-layer.6": 492.4932, "encoder_q-layer.7": 536.7491, "encoder_q-layer.8": 662.6262, "encoder_q-layer.9": 575.2614, "epoch": 0.64, "inbatch_neg_score": 0.4566, "inbatch_pos_score": 1.1553, "learning_rate": 1.9e-05, "loss": 3.1047, "norm_diff": 0.0793, "norm_loss": 0.0, "num_token_doc": 66.7249, "num_token_overlap": 15.8324, "num_token_query": 42.369, "num_token_union": 68.4813, "num_word_context": 202.304, "num_word_doc": 49.7867, "num_word_query": 32.0121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 923.0343, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4563, "query_norm": 1.4261, "queue_k_norm": 1.4993, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.369, "sent_len_1": 66.7249, "sent_len_max_0": 128.0, "sent_len_max_1": 192.4225, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 3.1086, "doc_norm": 1.5095, "encoder_q-embeddings": 640.9017, "encoder_q-layer.0": 439.7267, "encoder_q-layer.1": 474.9793, "encoder_q-layer.10": 618.0182, "encoder_q-layer.11": 1449.0486, "encoder_q-layer.2": 552.9206, "encoder_q-layer.3": 579.7059, "encoder_q-layer.4": 693.9724, "encoder_q-layer.5": 720.4792, "encoder_q-layer.6": 695.8893, "encoder_q-layer.7": 684.4658, "encoder_q-layer.8": 695.5156, "encoder_q-layer.9": 602.3857, "epoch": 0.64, "inbatch_neg_score": 0.4557, "inbatch_pos_score": 1.1475, "learning_rate": 1.8944444444444447e-05, "loss": 3.1086, "norm_diff": 0.0844, "norm_loss": 0.0, "num_token_doc": 66.7653, "num_token_overlap": 15.837, "num_token_query": 42.4509, "num_token_union": 68.5478, "num_word_context": 202.254, "num_word_doc": 49.8033, "num_word_query": 32.0669, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1069.2902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4556, "query_norm": 1.4251, "queue_k_norm": 1.5015, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4509, "sent_len_1": 66.7653, "sent_len_max_0": 127.985, "sent_len_max_1": 186.9512, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0958, "doc_norm": 1.5062, "encoder_q-embeddings": 933.5188, "encoder_q-layer.0": 658.7961, "encoder_q-layer.1": 725.5661, "encoder_q-layer.10": 596.6386, "encoder_q-layer.11": 1308.2986, "encoder_q-layer.2": 807.3618, "encoder_q-layer.3": 865.5926, "encoder_q-layer.4": 916.604, "encoder_q-layer.5": 924.9445, "encoder_q-layer.6": 892.713, "encoder_q-layer.7": 849.1266, "encoder_q-layer.8": 790.1003, "encoder_q-layer.9": 590.3345, "epoch": 0.64, "inbatch_neg_score": 0.4535, "inbatch_pos_score": 1.1279, "learning_rate": 1.888888888888889e-05, "loss": 3.0958, "norm_diff": 0.0796, "norm_loss": 0.0, "num_token_doc": 66.8559, "num_token_overlap": 15.8659, "num_token_query": 42.4405, "num_token_union": 68.5284, "num_word_context": 202.3066, "num_word_doc": 49.8855, "num_word_query": 32.0665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1278.7356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4519, "query_norm": 1.4266, "queue_k_norm": 1.5036, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4405, "sent_len_1": 66.8559, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9563, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.1058, "doc_norm": 1.4966, "encoder_q-embeddings": 558.4289, "encoder_q-layer.0": 361.2244, "encoder_q-layer.1": 385.4317, "encoder_q-layer.10": 769.5422, "encoder_q-layer.11": 1403.5079, "encoder_q-layer.2": 450.849, "encoder_q-layer.3": 464.7316, "encoder_q-layer.4": 479.4969, "encoder_q-layer.5": 486.6818, "encoder_q-layer.6": 550.0507, "encoder_q-layer.7": 590.0786, "encoder_q-layer.8": 657.7141, "encoder_q-layer.9": 585.0912, "epoch": 0.65, "inbatch_neg_score": 0.4557, "inbatch_pos_score": 1.1445, "learning_rate": 1.8833333333333335e-05, "loss": 3.1058, "norm_diff": 0.0588, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 15.7693, "num_token_query": 42.2124, "num_token_union": 68.4453, "num_word_context": 201.7299, "num_word_doc": 49.7955, "num_word_query": 31.8835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 978.5684, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4551, "query_norm": 1.4378, "queue_k_norm": 1.5034, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2124, "sent_len_1": 66.7575, "sent_len_max_0": 128.0, "sent_len_max_1": 190.36, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.1134, "doc_norm": 1.5058, "encoder_q-embeddings": 1215.1466, "encoder_q-layer.0": 906.588, "encoder_q-layer.1": 1010.9785, "encoder_q-layer.10": 606.7596, "encoder_q-layer.11": 1445.1472, "encoder_q-layer.2": 1309.0305, "encoder_q-layer.3": 1142.3624, "encoder_q-layer.4": 1075.6113, "encoder_q-layer.5": 1223.5682, "encoder_q-layer.6": 1068.3202, "encoder_q-layer.7": 984.3321, "encoder_q-layer.8": 963.1516, "encoder_q-layer.9": 723.7699, "epoch": 0.65, "inbatch_neg_score": 0.4515, "inbatch_pos_score": 1.1201, "learning_rate": 1.8777777777777777e-05, "loss": 3.1134, "norm_diff": 0.0712, "norm_loss": 0.0, "num_token_doc": 66.8064, "num_token_overlap": 15.8214, "num_token_query": 42.2003, "num_token_union": 68.3853, "num_word_context": 202.3127, "num_word_doc": 49.8344, "num_word_query": 31.8833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1637.9895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4517, "query_norm": 1.4346, "queue_k_norm": 1.504, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2003, "sent_len_1": 66.8064, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4038, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.098, "doc_norm": 1.5, "encoder_q-embeddings": 1058.8231, "encoder_q-layer.0": 676.6868, "encoder_q-layer.1": 720.409, "encoder_q-layer.10": 1257.0454, "encoder_q-layer.11": 2903.0286, "encoder_q-layer.2": 804.5236, "encoder_q-layer.3": 825.7524, "encoder_q-layer.4": 876.306, "encoder_q-layer.5": 898.5298, "encoder_q-layer.6": 960.3922, "encoder_q-layer.7": 1069.0319, "encoder_q-layer.8": 1276.6193, "encoder_q-layer.9": 1177.231, "epoch": 0.65, "inbatch_neg_score": 0.4613, "inbatch_pos_score": 1.1182, "learning_rate": 1.8722222222222223e-05, "loss": 3.098, "norm_diff": 0.0606, "norm_loss": 0.0, "num_token_doc": 66.9612, "num_token_overlap": 15.8406, "num_token_query": 42.3355, "num_token_union": 68.5799, "num_word_context": 202.6445, "num_word_doc": 49.9418, "num_word_query": 31.9981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1875.1578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4592, "query_norm": 1.4395, "queue_k_norm": 1.5053, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3355, "sent_len_1": 66.9612, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8288, "stdk": 0.0483, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.1054, "doc_norm": 1.5052, "encoder_q-embeddings": 1002.9695, "encoder_q-layer.0": 660.8326, "encoder_q-layer.1": 699.3761, "encoder_q-layer.10": 1258.5555, "encoder_q-layer.11": 2764.9163, "encoder_q-layer.2": 776.7487, "encoder_q-layer.3": 819.8915, "encoder_q-layer.4": 867.6615, "encoder_q-layer.5": 915.5161, "encoder_q-layer.6": 972.8666, "encoder_q-layer.7": 1170.9755, "encoder_q-layer.8": 1307.3864, "encoder_q-layer.9": 1146.3105, "epoch": 0.65, "inbatch_neg_score": 0.4531, "inbatch_pos_score": 1.1211, "learning_rate": 1.866666666666667e-05, "loss": 3.1054, "norm_diff": 0.0672, "norm_loss": 0.0, "num_token_doc": 66.7503, "num_token_overlap": 15.8024, "num_token_query": 42.3681, "num_token_union": 68.4913, "num_word_context": 202.4576, "num_word_doc": 49.7846, "num_word_query": 31.9811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1829.5051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4524, "query_norm": 1.438, "queue_k_norm": 1.5044, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3681, "sent_len_1": 66.7503, "sent_len_max_0": 127.9925, "sent_len_max_1": 192.3, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1052, "doc_norm": 1.5051, "encoder_q-embeddings": 1442.126, "encoder_q-layer.0": 994.738, "encoder_q-layer.1": 1129.3663, "encoder_q-layer.10": 1284.4368, "encoder_q-layer.11": 2808.801, "encoder_q-layer.2": 1311.8516, "encoder_q-layer.3": 1405.1208, "encoder_q-layer.4": 1472.2438, "encoder_q-layer.5": 1565.7184, "encoder_q-layer.6": 1544.1974, "encoder_q-layer.7": 1504.0006, "encoder_q-layer.8": 1469.6808, "encoder_q-layer.9": 1194.7656, "epoch": 0.65, "inbatch_neg_score": 0.4532, "inbatch_pos_score": 1.1104, "learning_rate": 1.861111111111111e-05, "loss": 3.1052, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.9059, "num_token_overlap": 15.8692, "num_token_query": 42.3377, "num_token_union": 68.5252, "num_word_context": 202.2501, "num_word_doc": 49.9362, "num_word_query": 31.9603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2246.786, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4539, "query_norm": 1.4292, "queue_k_norm": 1.5052, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3377, "sent_len_1": 66.9059, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8212, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0977, "doc_norm": 1.5051, "encoder_q-embeddings": 915.7327, "encoder_q-layer.0": 628.1277, "encoder_q-layer.1": 669.6373, "encoder_q-layer.10": 1106.4976, "encoder_q-layer.11": 2582.0098, "encoder_q-layer.2": 770.8667, "encoder_q-layer.3": 791.9177, "encoder_q-layer.4": 816.9875, "encoder_q-layer.5": 811.821, "encoder_q-layer.6": 953.6121, "encoder_q-layer.7": 1082.7416, "encoder_q-layer.8": 1272.0309, "encoder_q-layer.9": 1069.1337, "epoch": 0.65, "inbatch_neg_score": 0.4553, "inbatch_pos_score": 1.1367, "learning_rate": 1.8555555555555557e-05, "loss": 3.0977, "norm_diff": 0.0778, "norm_loss": 0.0, "num_token_doc": 66.817, "num_token_overlap": 15.8531, "num_token_query": 42.3042, "num_token_union": 68.4101, "num_word_context": 202.286, "num_word_doc": 49.8379, "num_word_query": 31.9546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1699.133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4548, "query_norm": 1.4273, "queue_k_norm": 1.5057, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3042, "sent_len_1": 66.817, "sent_len_max_0": 128.0, "sent_len_max_1": 191.1625, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.1024, "doc_norm": 1.5054, "encoder_q-embeddings": 1973.7463, "encoder_q-layer.0": 1386.788, "encoder_q-layer.1": 1517.1924, "encoder_q-layer.10": 1164.2673, "encoder_q-layer.11": 2788.1499, "encoder_q-layer.2": 1787.1016, "encoder_q-layer.3": 1732.9691, "encoder_q-layer.4": 1393.3866, "encoder_q-layer.5": 1275.4999, "encoder_q-layer.6": 1350.3329, "encoder_q-layer.7": 1401.4219, "encoder_q-layer.8": 1457.5468, "encoder_q-layer.9": 1196.1973, "epoch": 0.65, "inbatch_neg_score": 0.4616, "inbatch_pos_score": 1.1172, "learning_rate": 1.85e-05, "loss": 3.1024, "norm_diff": 0.0817, "norm_loss": 0.0, "num_token_doc": 66.7047, "num_token_overlap": 15.7965, "num_token_query": 42.3296, "num_token_union": 68.4461, "num_word_context": 202.1719, "num_word_doc": 49.7662, "num_word_query": 31.9916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2517.098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4614, "query_norm": 1.4237, "queue_k_norm": 1.5069, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3296, "sent_len_1": 66.7047, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.7575, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.1034, "doc_norm": 1.5037, "encoder_q-embeddings": 1071.5778, "encoder_q-layer.0": 738.6709, "encoder_q-layer.1": 783.1641, "encoder_q-layer.10": 1203.1964, "encoder_q-layer.11": 2855.2856, "encoder_q-layer.2": 885.1596, "encoder_q-layer.3": 877.3627, "encoder_q-layer.4": 923.2576, "encoder_q-layer.5": 933.5081, "encoder_q-layer.6": 991.6709, "encoder_q-layer.7": 1089.3781, "encoder_q-layer.8": 1279.5319, "encoder_q-layer.9": 1117.5769, "epoch": 0.65, "inbatch_neg_score": 0.463, "inbatch_pos_score": 1.1562, "learning_rate": 1.8444444444444445e-05, "loss": 3.1034, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 67.0054, "num_token_overlap": 15.8386, "num_token_query": 42.2794, "num_token_union": 68.5563, "num_word_context": 202.8276, "num_word_doc": 49.9727, "num_word_query": 31.9307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1878.0753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4626, "query_norm": 1.4452, "queue_k_norm": 1.508, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2794, "sent_len_1": 67.0054, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.4975, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.1128, "doc_norm": 1.5062, "encoder_q-embeddings": 1473.1167, "encoder_q-layer.0": 1012.4535, "encoder_q-layer.1": 1148.5441, "encoder_q-layer.10": 1209.6104, "encoder_q-layer.11": 2674.4424, "encoder_q-layer.2": 1323.0518, "encoder_q-layer.3": 1325.6451, "encoder_q-layer.4": 1332.2753, "encoder_q-layer.5": 1261.2941, "encoder_q-layer.6": 1394.7305, "encoder_q-layer.7": 1341.1748, "encoder_q-layer.8": 1422.6709, "encoder_q-layer.9": 1195.1849, "epoch": 0.65, "inbatch_neg_score": 0.4635, "inbatch_pos_score": 1.1328, "learning_rate": 1.838888888888889e-05, "loss": 3.1128, "norm_diff": 0.0614, "norm_loss": 0.0, "num_token_doc": 66.9044, "num_token_overlap": 15.8144, "num_token_query": 42.3411, "num_token_union": 68.5563, "num_word_context": 202.4565, "num_word_doc": 49.9411, "num_word_query": 32.0214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2190.1812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4634, "query_norm": 1.4448, "queue_k_norm": 1.5069, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3411, "sent_len_1": 66.9044, "sent_len_max_0": 128.0, "sent_len_max_1": 190.155, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.1084, "doc_norm": 1.512, "encoder_q-embeddings": 1368.4344, "encoder_q-layer.0": 922.2326, "encoder_q-layer.1": 1066.8934, "encoder_q-layer.10": 1205.4004, "encoder_q-layer.11": 2733.605, "encoder_q-layer.2": 1189.9308, "encoder_q-layer.3": 1215.0459, "encoder_q-layer.4": 1251.5063, "encoder_q-layer.5": 1318.0248, "encoder_q-layer.6": 1273.4878, "encoder_q-layer.7": 1310.1743, "encoder_q-layer.8": 1360.6053, "encoder_q-layer.9": 1133.6884, "epoch": 0.65, "inbatch_neg_score": 0.4647, "inbatch_pos_score": 1.1318, "learning_rate": 1.8333333333333333e-05, "loss": 3.1084, "norm_diff": 0.0745, "norm_loss": 0.0, "num_token_doc": 66.6342, "num_token_overlap": 15.7968, "num_token_query": 42.2871, "num_token_union": 68.3769, "num_word_context": 202.1671, "num_word_doc": 49.7201, "num_word_query": 31.9492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.3376, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4641, "query_norm": 1.4375, "queue_k_norm": 1.5083, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2871, "sent_len_1": 66.6342, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.9625, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0903, "doc_norm": 1.5139, "encoder_q-embeddings": 2594.8948, "encoder_q-layer.0": 1931.4609, "encoder_q-layer.1": 2219.5461, "encoder_q-layer.10": 1259.7465, "encoder_q-layer.11": 2726.9387, "encoder_q-layer.2": 2796.1311, "encoder_q-layer.3": 3063.1375, "encoder_q-layer.4": 3476.3337, "encoder_q-layer.5": 3877.1726, "encoder_q-layer.6": 3592.228, "encoder_q-layer.7": 2804.4919, "encoder_q-layer.8": 1582.6335, "encoder_q-layer.9": 1167.3452, "epoch": 0.66, "inbatch_neg_score": 0.4681, "inbatch_pos_score": 1.1543, "learning_rate": 1.827777777777778e-05, "loss": 3.0903, "norm_diff": 0.0808, "norm_loss": 0.0, "num_token_doc": 66.9982, "num_token_overlap": 15.8477, "num_token_query": 42.3841, "num_token_union": 68.659, "num_word_context": 202.7124, "num_word_doc": 50.0008, "num_word_query": 32.0059, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3957.8795, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4683, "query_norm": 1.4331, "queue_k_norm": 1.5079, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3841, "sent_len_1": 66.9982, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.2475, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.086, "doc_norm": 1.5036, "encoder_q-embeddings": 2100.1648, "encoder_q-layer.0": 1380.5101, "encoder_q-layer.1": 1597.4922, "encoder_q-layer.10": 1228.3936, "encoder_q-layer.11": 2780.3357, "encoder_q-layer.2": 1874.5769, "encoder_q-layer.3": 1761.9264, "encoder_q-layer.4": 2045.0177, "encoder_q-layer.5": 1923.4136, "encoder_q-layer.6": 2218.8142, "encoder_q-layer.7": 2109.8264, "encoder_q-layer.8": 1728.3549, "encoder_q-layer.9": 1316.7656, "epoch": 0.66, "inbatch_neg_score": 0.4666, "inbatch_pos_score": 1.1367, "learning_rate": 1.8222222222222224e-05, "loss": 3.086, "norm_diff": 0.0685, "norm_loss": 0.0, "num_token_doc": 66.6076, "num_token_overlap": 15.8587, "num_token_query": 42.3089, "num_token_union": 68.3064, "num_word_context": 202.0421, "num_word_doc": 49.7174, "num_word_query": 31.976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2831.4542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4656, "query_norm": 1.435, "queue_k_norm": 1.5106, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3089, "sent_len_1": 66.6076, "sent_len_max_0": 127.9887, "sent_len_max_1": 186.3288, "stdk": 0.0483, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.1021, "doc_norm": 1.5126, "encoder_q-embeddings": 1348.0194, "encoder_q-layer.0": 993.5203, "encoder_q-layer.1": 1070.6648, "encoder_q-layer.10": 1233.3186, "encoder_q-layer.11": 2876.5908, "encoder_q-layer.2": 1243.4249, "encoder_q-layer.3": 1375.5265, "encoder_q-layer.4": 1462.4617, "encoder_q-layer.5": 1566.328, "encoder_q-layer.6": 1608.5793, "encoder_q-layer.7": 1483.1116, "encoder_q-layer.8": 1502.4427, "encoder_q-layer.9": 1226.66, "epoch": 0.66, "inbatch_neg_score": 0.4666, "inbatch_pos_score": 1.165, "learning_rate": 1.8166666666666667e-05, "loss": 3.1021, "norm_diff": 0.0597, "norm_loss": 0.0, "num_token_doc": 66.8079, "num_token_overlap": 15.7902, "num_token_query": 42.2388, "num_token_union": 68.4704, "num_word_context": 202.1653, "num_word_doc": 49.8161, "num_word_query": 31.8746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2308.5293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4666, "query_norm": 1.4529, "queue_k_norm": 1.5083, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2388, "sent_len_1": 66.8079, "sent_len_max_0": 127.995, "sent_len_max_1": 192.23, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.1039, "doc_norm": 1.5076, "encoder_q-embeddings": 988.1222, "encoder_q-layer.0": 665.0218, "encoder_q-layer.1": 702.4736, "encoder_q-layer.10": 1333.725, "encoder_q-layer.11": 2850.0391, "encoder_q-layer.2": 781.8157, "encoder_q-layer.3": 838.5613, "encoder_q-layer.4": 874.2997, "encoder_q-layer.5": 954.8613, "encoder_q-layer.6": 1050.9113, "encoder_q-layer.7": 1177.5898, "encoder_q-layer.8": 1293.0208, "encoder_q-layer.9": 1197.8177, "epoch": 0.66, "inbatch_neg_score": 0.4718, "inbatch_pos_score": 1.1533, "learning_rate": 1.8111111111111112e-05, "loss": 3.1039, "norm_diff": 0.0609, "norm_loss": 0.0, "num_token_doc": 66.6914, "num_token_overlap": 15.8075, "num_token_query": 42.3483, "num_token_union": 68.4671, "num_word_context": 202.3459, "num_word_doc": 49.7318, "num_word_query": 31.9682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1901.3183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4697, "query_norm": 1.4467, "queue_k_norm": 1.5101, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3483, "sent_len_1": 66.6914, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2625, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0934, "doc_norm": 1.5084, "encoder_q-embeddings": 2176.5552, "encoder_q-layer.0": 1465.5336, "encoder_q-layer.1": 1602.9038, "encoder_q-layer.10": 1279.5098, "encoder_q-layer.11": 3148.1196, "encoder_q-layer.2": 1907.3094, "encoder_q-layer.3": 1941.5175, "encoder_q-layer.4": 2114.5681, "encoder_q-layer.5": 2285.8484, "encoder_q-layer.6": 2099.8655, "encoder_q-layer.7": 2169.7627, "encoder_q-layer.8": 1941.4375, "encoder_q-layer.9": 1374.2185, "epoch": 0.66, "inbatch_neg_score": 0.4719, "inbatch_pos_score": 1.1348, "learning_rate": 1.8055555555555555e-05, "loss": 3.0934, "norm_diff": 0.0583, "norm_loss": 0.0, "num_token_doc": 67.0288, "num_token_overlap": 15.8206, "num_token_query": 42.3692, "num_token_union": 68.6265, "num_word_context": 202.7009, "num_word_doc": 50.0143, "num_word_query": 31.9918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3050.6952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4722, "query_norm": 1.4501, "queue_k_norm": 1.509, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3692, "sent_len_1": 67.0288, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.6075, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 60.2539, "active_queue_size": 16384.0, "cl_loss": 3.0787, "doc_norm": 1.5129, "encoder_q-embeddings": 972.4592, "encoder_q-layer.0": 649.9488, "encoder_q-layer.1": 702.3318, "encoder_q-layer.10": 1074.7615, "encoder_q-layer.11": 2658.2324, "encoder_q-layer.2": 807.7651, "encoder_q-layer.3": 820.4854, "encoder_q-layer.4": 901.1022, "encoder_q-layer.5": 951.5662, "encoder_q-layer.6": 1024.973, "encoder_q-layer.7": 1088.3538, "encoder_q-layer.8": 1200.0869, "encoder_q-layer.9": 1085.925, "epoch": 0.66, "inbatch_neg_score": 0.4647, "inbatch_pos_score": 1.1582, "learning_rate": 1.8e-05, "loss": 3.0787, "norm_diff": 0.0708, "norm_loss": 0.0, "num_token_doc": 66.8494, "num_token_overlap": 15.9142, "num_token_query": 42.5775, "num_token_union": 68.61, "num_word_context": 202.4463, "num_word_doc": 49.8959, "num_word_query": 32.1662, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1777.8824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4646, "query_norm": 1.4421, "queue_k_norm": 1.5099, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5775, "sent_len_1": 66.8494, "sent_len_max_0": 127.9675, "sent_len_max_1": 187.945, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0806, "doc_norm": 1.5016, "encoder_q-embeddings": 1153.0972, "encoder_q-layer.0": 773.8409, "encoder_q-layer.1": 885.1908, "encoder_q-layer.10": 1227.0629, "encoder_q-layer.11": 2740.9216, "encoder_q-layer.2": 1053.0092, "encoder_q-layer.3": 1020.5173, "encoder_q-layer.4": 1024.9683, "encoder_q-layer.5": 1073.0745, "encoder_q-layer.6": 1213.8092, "encoder_q-layer.7": 1292.5911, "encoder_q-layer.8": 1329.1084, "encoder_q-layer.9": 1204.3724, "epoch": 0.66, "inbatch_neg_score": 0.4718, "inbatch_pos_score": 1.1445, "learning_rate": 1.7944444444444443e-05, "loss": 3.0806, "norm_diff": 0.0566, "norm_loss": 0.0, "num_token_doc": 66.6068, "num_token_overlap": 15.777, "num_token_query": 42.2199, "num_token_union": 68.3891, "num_word_context": 202.217, "num_word_doc": 49.6993, "num_word_query": 31.8776, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1959.9667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4702, "query_norm": 1.445, "queue_k_norm": 1.5104, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2199, "sent_len_1": 66.6068, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5687, "stdk": 0.0482, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0907, "doc_norm": 1.5105, "encoder_q-embeddings": 1163.9285, "encoder_q-layer.0": 772.8672, "encoder_q-layer.1": 801.2816, "encoder_q-layer.10": 1268.3148, "encoder_q-layer.11": 2734.9805, "encoder_q-layer.2": 900.5405, "encoder_q-layer.3": 912.563, "encoder_q-layer.4": 1011.1685, "encoder_q-layer.5": 1040.3851, "encoder_q-layer.6": 1111.7922, "encoder_q-layer.7": 1203.7341, "encoder_q-layer.8": 1402.0649, "encoder_q-layer.9": 1159.1208, "epoch": 0.66, "inbatch_neg_score": 0.4675, "inbatch_pos_score": 1.1523, "learning_rate": 1.788888888888889e-05, "loss": 3.0907, "norm_diff": 0.0614, "norm_loss": 0.0, "num_token_doc": 66.6405, "num_token_overlap": 15.7935, "num_token_query": 42.2711, "num_token_union": 68.4259, "num_word_context": 202.2538, "num_word_doc": 49.7459, "num_word_query": 31.9399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1930.1024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4673, "query_norm": 1.4491, "queue_k_norm": 1.5112, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2711, "sent_len_1": 66.6405, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7225, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.1175, "doc_norm": 1.5086, "encoder_q-embeddings": 1157.8467, "encoder_q-layer.0": 749.8716, "encoder_q-layer.1": 834.7479, "encoder_q-layer.10": 1232.9385, "encoder_q-layer.11": 2806.948, "encoder_q-layer.2": 964.6319, "encoder_q-layer.3": 981.8973, "encoder_q-layer.4": 1004.1985, "encoder_q-layer.5": 1034.7999, "encoder_q-layer.6": 1127.1193, "encoder_q-layer.7": 1201.8459, "encoder_q-layer.8": 1410.0186, "encoder_q-layer.9": 1235.2288, "epoch": 0.66, "inbatch_neg_score": 0.4672, "inbatch_pos_score": 1.1572, "learning_rate": 1.7833333333333334e-05, "loss": 3.1175, "norm_diff": 0.0584, "norm_loss": 0.0, "num_token_doc": 66.5898, "num_token_overlap": 15.7947, "num_token_query": 42.1174, "num_token_union": 68.2741, "num_word_context": 201.6835, "num_word_doc": 49.6667, "num_word_query": 31.811, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1979.4181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4675, "query_norm": 1.4502, "queue_k_norm": 1.5109, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1174, "sent_len_1": 66.5898, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.3063, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1041, "doc_norm": 1.5095, "encoder_q-embeddings": 1155.9584, "encoder_q-layer.0": 781.2295, "encoder_q-layer.1": 873.7141, "encoder_q-layer.10": 1193.1544, "encoder_q-layer.11": 2915.5527, "encoder_q-layer.2": 972.5986, "encoder_q-layer.3": 1031.1873, "encoder_q-layer.4": 1094.3387, "encoder_q-layer.5": 1184.8079, "encoder_q-layer.6": 1284.84, "encoder_q-layer.7": 1309.606, "encoder_q-layer.8": 1564.4318, "encoder_q-layer.9": 1270.434, "epoch": 0.66, "inbatch_neg_score": 0.473, "inbatch_pos_score": 1.166, "learning_rate": 1.777777777777778e-05, "loss": 3.1041, "norm_diff": 0.0477, "norm_loss": 0.0, "num_token_doc": 66.8759, "num_token_overlap": 15.8671, "num_token_query": 42.3506, "num_token_union": 68.5063, "num_word_context": 202.1771, "num_word_doc": 49.8913, "num_word_query": 31.9889, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2086.072, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4719, "query_norm": 1.4618, "queue_k_norm": 1.5119, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3506, "sent_len_1": 66.8759, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.11, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.105, "doc_norm": 1.5114, "encoder_q-embeddings": 1933.3025, "encoder_q-layer.0": 1305.5591, "encoder_q-layer.1": 1585.4236, "encoder_q-layer.10": 1215.7535, "encoder_q-layer.11": 2708.3796, "encoder_q-layer.2": 1810.8715, "encoder_q-layer.3": 2021.7729, "encoder_q-layer.4": 2128.386, "encoder_q-layer.5": 2082.3547, "encoder_q-layer.6": 2008.1506, "encoder_q-layer.7": 1904.3719, "encoder_q-layer.8": 1729.2272, "encoder_q-layer.9": 1283.1527, "epoch": 0.66, "inbatch_neg_score": 0.4795, "inbatch_pos_score": 1.1699, "learning_rate": 1.7722222222222222e-05, "loss": 3.105, "norm_diff": 0.0539, "norm_loss": 0.0, "num_token_doc": 66.7383, "num_token_overlap": 15.793, "num_token_query": 42.2642, "num_token_union": 68.4431, "num_word_context": 202.2332, "num_word_doc": 49.7933, "num_word_query": 31.8796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2752.422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4788, "query_norm": 1.4575, "queue_k_norm": 1.5112, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2642, "sent_len_1": 66.7383, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6175, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.1045, "doc_norm": 1.5093, "encoder_q-embeddings": 1410.9318, "encoder_q-layer.0": 918.6921, "encoder_q-layer.1": 1079.1442, "encoder_q-layer.10": 1157.9713, "encoder_q-layer.11": 2713.7317, "encoder_q-layer.2": 1279.5232, "encoder_q-layer.3": 1369.2458, "encoder_q-layer.4": 1389.7784, "encoder_q-layer.5": 1353.5056, "encoder_q-layer.6": 1426.2671, "encoder_q-layer.7": 1414.8031, "encoder_q-layer.8": 1354.7698, "encoder_q-layer.9": 1146.214, "epoch": 0.67, "inbatch_neg_score": 0.4709, "inbatch_pos_score": 1.1641, "learning_rate": 1.7666666666666668e-05, "loss": 3.1045, "norm_diff": 0.0588, "norm_loss": 0.0, "num_token_doc": 66.6309, "num_token_overlap": 15.8079, "num_token_query": 42.3415, "num_token_union": 68.4226, "num_word_context": 202.0196, "num_word_doc": 49.6784, "num_word_query": 31.987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2194.5472, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4722, "query_norm": 1.4505, "queue_k_norm": 1.5098, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3415, "sent_len_1": 66.6309, "sent_len_max_0": 127.995, "sent_len_max_1": 189.48, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.086, "doc_norm": 1.5084, "encoder_q-embeddings": 2142.4351, "encoder_q-layer.0": 1397.4717, "encoder_q-layer.1": 1594.6582, "encoder_q-layer.10": 2321.2815, "encoder_q-layer.11": 5147.6216, "encoder_q-layer.2": 1753.9354, "encoder_q-layer.3": 1816.0701, "encoder_q-layer.4": 1963.1106, "encoder_q-layer.5": 2099.6411, "encoder_q-layer.6": 2222.7727, "encoder_q-layer.7": 2403.988, "encoder_q-layer.8": 2655.4185, "encoder_q-layer.9": 2405.6895, "epoch": 0.67, "inbatch_neg_score": 0.4819, "inbatch_pos_score": 1.1543, "learning_rate": 1.761111111111111e-05, "loss": 3.086, "norm_diff": 0.0472, "norm_loss": 0.0, "num_token_doc": 66.7488, "num_token_overlap": 15.7938, "num_token_query": 42.3609, "num_token_union": 68.5216, "num_word_context": 202.2326, "num_word_doc": 49.793, "num_word_query": 32.0019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3692.1532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4812, "query_norm": 1.4612, "queue_k_norm": 1.5133, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3609, "sent_len_1": 66.7488, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4625, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.0933, "doc_norm": 1.5136, "encoder_q-embeddings": 2126.1228, "encoder_q-layer.0": 1523.6055, "encoder_q-layer.1": 1582.3274, "encoder_q-layer.10": 2429.5039, "encoder_q-layer.11": 5536.6504, "encoder_q-layer.2": 1771.9594, "encoder_q-layer.3": 1826.2498, "encoder_q-layer.4": 1916.1578, "encoder_q-layer.5": 2113.238, "encoder_q-layer.6": 2369.8013, "encoder_q-layer.7": 2521.6853, "encoder_q-layer.8": 2766.0667, "encoder_q-layer.9": 2386.9832, "epoch": 0.67, "inbatch_neg_score": 0.4764, "inbatch_pos_score": 1.1543, "learning_rate": 1.7555555555555556e-05, "loss": 3.0933, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 67.0577, "num_token_overlap": 15.8667, "num_token_query": 42.2649, "num_token_union": 68.5312, "num_word_context": 202.6978, "num_word_doc": 50.0408, "num_word_query": 31.9041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3843.7665, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4785, "query_norm": 1.4388, "queue_k_norm": 1.5141, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2649, "sent_len_1": 67.0577, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.1037, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.1004, "doc_norm": 1.5123, "encoder_q-embeddings": 2401.6807, "encoder_q-layer.0": 1573.0612, "encoder_q-layer.1": 1665.7952, "encoder_q-layer.10": 2678.2466, "encoder_q-layer.11": 5794.5166, "encoder_q-layer.2": 1862.8625, "encoder_q-layer.3": 1836.1572, "encoder_q-layer.4": 2020.4799, "encoder_q-layer.5": 2096.2793, "encoder_q-layer.6": 2313.5396, "encoder_q-layer.7": 2367.4832, "encoder_q-layer.8": 2623.8093, "encoder_q-layer.9": 2331.3811, "epoch": 0.67, "inbatch_neg_score": 0.4807, "inbatch_pos_score": 1.1504, "learning_rate": 1.75e-05, "loss": 3.1004, "norm_diff": 0.0633, "norm_loss": 0.0, "num_token_doc": 66.5825, "num_token_overlap": 15.8316, "num_token_query": 42.4456, "num_token_union": 68.4343, "num_word_context": 202.3354, "num_word_doc": 49.6912, "num_word_query": 32.0384, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3984.9968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.481, "query_norm": 1.449, "queue_k_norm": 1.5131, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4456, "sent_len_1": 66.5825, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2025, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.0927, "doc_norm": 1.5154, "encoder_q-embeddings": 2435.0681, "encoder_q-layer.0": 1613.7924, "encoder_q-layer.1": 1829.4968, "encoder_q-layer.10": 2430.5635, "encoder_q-layer.11": 5698.2627, "encoder_q-layer.2": 2145.1814, "encoder_q-layer.3": 2246.4841, "encoder_q-layer.4": 2262.7207, "encoder_q-layer.5": 2495.9253, "encoder_q-layer.6": 2492.1465, "encoder_q-layer.7": 2695.3181, "encoder_q-layer.8": 2836.7415, "encoder_q-layer.9": 2370.5837, "epoch": 0.67, "inbatch_neg_score": 0.4906, "inbatch_pos_score": 1.1797, "learning_rate": 1.7444444444444448e-05, "loss": 3.0927, "norm_diff": 0.0604, "norm_loss": 0.0, "num_token_doc": 66.6955, "num_token_overlap": 15.8578, "num_token_query": 42.5958, "num_token_union": 68.609, "num_word_context": 202.6175, "num_word_doc": 49.7755, "num_word_query": 32.1767, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4143.2042, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4905, "query_norm": 1.455, "queue_k_norm": 1.5128, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5958, "sent_len_1": 66.6955, "sent_len_max_0": 128.0, "sent_len_max_1": 188.695, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.1168, "doc_norm": 1.509, "encoder_q-embeddings": 2275.9282, "encoder_q-layer.0": 1467.3181, "encoder_q-layer.1": 1576.1179, "encoder_q-layer.10": 2370.0303, "encoder_q-layer.11": 5596.7588, "encoder_q-layer.2": 1819.7001, "encoder_q-layer.3": 1860.1313, "encoder_q-layer.4": 1986.1162, "encoder_q-layer.5": 2008.1227, "encoder_q-layer.6": 2221.4172, "encoder_q-layer.7": 2250.929, "encoder_q-layer.8": 2762.074, "encoder_q-layer.9": 2407.052, "epoch": 0.67, "inbatch_neg_score": 0.4941, "inbatch_pos_score": 1.1602, "learning_rate": 1.738888888888889e-05, "loss": 3.1168, "norm_diff": 0.0681, "norm_loss": 0.0, "num_token_doc": 66.5861, "num_token_overlap": 15.7658, "num_token_query": 42.0766, "num_token_union": 68.2732, "num_word_context": 201.8886, "num_word_doc": 49.7216, "num_word_query": 31.7604, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3856.3739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4937, "query_norm": 1.4409, "queue_k_norm": 1.5143, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.0766, "sent_len_1": 66.5861, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.2725, "stdk": 0.0484, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.106, "doc_norm": 1.5137, "encoder_q-embeddings": 3703.5752, "encoder_q-layer.0": 2624.4009, "encoder_q-layer.1": 2817.5906, "encoder_q-layer.10": 2657.7507, "encoder_q-layer.11": 5807.3999, "encoder_q-layer.2": 3226.0002, "encoder_q-layer.3": 3169.1648, "encoder_q-layer.4": 3049.9497, "encoder_q-layer.5": 3014.5693, "encoder_q-layer.6": 3116.0549, "encoder_q-layer.7": 3210.6328, "encoder_q-layer.8": 3097.0071, "encoder_q-layer.9": 2497.5723, "epoch": 0.67, "inbatch_neg_score": 0.4949, "inbatch_pos_score": 1.1641, "learning_rate": 1.7333333333333336e-05, "loss": 3.106, "norm_diff": 0.0842, "norm_loss": 0.0, "num_token_doc": 66.8273, "num_token_overlap": 15.8234, "num_token_query": 42.268, "num_token_union": 68.441, "num_word_context": 202.1163, "num_word_doc": 49.8297, "num_word_query": 31.931, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5017.4305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4961, "query_norm": 1.4295, "queue_k_norm": 1.5142, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.268, "sent_len_1": 66.8273, "sent_len_max_0": 128.0, "sent_len_max_1": 192.345, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0881, "doc_norm": 1.514, "encoder_q-embeddings": 1875.8788, "encoder_q-layer.0": 1265.4535, "encoder_q-layer.1": 1325.3782, "encoder_q-layer.10": 2340.9746, "encoder_q-layer.11": 5647.8213, "encoder_q-layer.2": 1546.7589, "encoder_q-layer.3": 1596.0417, "encoder_q-layer.4": 1628.335, "encoder_q-layer.5": 1656.5381, "encoder_q-layer.6": 1842.8517, "encoder_q-layer.7": 2169.0916, "encoder_q-layer.8": 2475.7639, "encoder_q-layer.9": 2285.4824, "epoch": 0.67, "inbatch_neg_score": 0.4982, "inbatch_pos_score": 1.1748, "learning_rate": 1.7277777777777778e-05, "loss": 3.0881, "norm_diff": 0.0654, "norm_loss": 0.0, "num_token_doc": 66.9076, "num_token_overlap": 15.8882, "num_token_query": 42.3512, "num_token_union": 68.4943, "num_word_context": 202.2741, "num_word_doc": 49.9349, "num_word_query": 32.0045, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3634.0671, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.499, "query_norm": 1.4486, "queue_k_norm": 1.5139, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3512, "sent_len_1": 66.9076, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.8425, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0912, "doc_norm": 1.5193, "encoder_q-embeddings": 3146.3774, "encoder_q-layer.0": 2159.458, "encoder_q-layer.1": 2369.1946, "encoder_q-layer.10": 2467.3936, "encoder_q-layer.11": 5417.1357, "encoder_q-layer.2": 2713.0156, "encoder_q-layer.3": 2873.4968, "encoder_q-layer.4": 3228.1345, "encoder_q-layer.5": 3514.7202, "encoder_q-layer.6": 3638.9419, "encoder_q-layer.7": 3584.5459, "encoder_q-layer.8": 3176.7122, "encoder_q-layer.9": 2473.6389, "epoch": 0.67, "inbatch_neg_score": 0.4915, "inbatch_pos_score": 1.1758, "learning_rate": 1.7222222222222224e-05, "loss": 3.0912, "norm_diff": 0.0786, "norm_loss": 0.0, "num_token_doc": 67.0436, "num_token_overlap": 15.9144, "num_token_query": 42.3852, "num_token_union": 68.6011, "num_word_context": 202.597, "num_word_doc": 50.0541, "num_word_query": 32.0031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4867.3262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4917, "query_norm": 1.4407, "queue_k_norm": 1.5157, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3852, "sent_len_1": 67.0436, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.9313, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.1076, "doc_norm": 1.5272, "encoder_q-embeddings": 2271.2903, "encoder_q-layer.0": 1540.0988, "encoder_q-layer.1": 1743.6428, "encoder_q-layer.10": 2367.9138, "encoder_q-layer.11": 5763.2915, "encoder_q-layer.2": 2241.135, "encoder_q-layer.3": 2109.6594, "encoder_q-layer.4": 2199.6494, "encoder_q-layer.5": 2006.6031, "encoder_q-layer.6": 2277.2432, "encoder_q-layer.7": 2324.5703, "encoder_q-layer.8": 2425.2615, "encoder_q-layer.9": 2235.8933, "epoch": 0.67, "inbatch_neg_score": 0.4996, "inbatch_pos_score": 1.1602, "learning_rate": 1.7166666666666666e-05, "loss": 3.1076, "norm_diff": 0.079, "norm_loss": 0.0, "num_token_doc": 67.0071, "num_token_overlap": 15.8541, "num_token_query": 42.3691, "num_token_union": 68.6146, "num_word_context": 202.5437, "num_word_doc": 49.9836, "num_word_query": 32.0246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3977.2236, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4995, "query_norm": 1.4482, "queue_k_norm": 1.5171, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3691, "sent_len_1": 67.0071, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6138, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.0979, "doc_norm": 1.5153, "encoder_q-embeddings": 2262.7751, "encoder_q-layer.0": 1541.7064, "encoder_q-layer.1": 1616.9814, "encoder_q-layer.10": 2707.4788, "encoder_q-layer.11": 5932.1157, "encoder_q-layer.2": 1833.2177, "encoder_q-layer.3": 1899.2219, "encoder_q-layer.4": 2040.137, "encoder_q-layer.5": 2238.1274, "encoder_q-layer.6": 2274.0876, "encoder_q-layer.7": 2595.3657, "encoder_q-layer.8": 2852.9111, "encoder_q-layer.9": 2499.4929, "epoch": 0.68, "inbatch_neg_score": 0.4976, "inbatch_pos_score": 1.1582, "learning_rate": 1.7111111111111112e-05, "loss": 3.0979, "norm_diff": 0.0671, "norm_loss": 0.0, "num_token_doc": 66.7837, "num_token_overlap": 15.846, "num_token_query": 42.4213, "num_token_union": 68.5099, "num_word_context": 202.2059, "num_word_doc": 49.874, "num_word_query": 32.04, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4023.9392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4961, "query_norm": 1.4483, "queue_k_norm": 1.5175, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4213, "sent_len_1": 66.7837, "sent_len_max_0": 128.0, "sent_len_max_1": 186.8313, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.0958, "doc_norm": 1.5191, "encoder_q-embeddings": 2456.9829, "encoder_q-layer.0": 1649.4703, "encoder_q-layer.1": 1763.2172, "encoder_q-layer.10": 2421.8989, "encoder_q-layer.11": 5758.0312, "encoder_q-layer.2": 1978.3654, "encoder_q-layer.3": 2117.5027, "encoder_q-layer.4": 2369.2407, "encoder_q-layer.5": 2607.5818, "encoder_q-layer.6": 2662.4597, "encoder_q-layer.7": 3069.1667, "encoder_q-layer.8": 3066.5967, "encoder_q-layer.9": 2392.2017, "epoch": 0.68, "inbatch_neg_score": 0.4998, "inbatch_pos_score": 1.1699, "learning_rate": 1.7055555555555554e-05, "loss": 3.0958, "norm_diff": 0.0683, "norm_loss": 0.0, "num_token_doc": 66.8936, "num_token_overlap": 15.8556, "num_token_query": 42.4349, "num_token_union": 68.5593, "num_word_context": 202.5596, "num_word_doc": 49.8866, "num_word_query": 32.053, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4188.8381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5005, "query_norm": 1.4508, "queue_k_norm": 1.5185, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4349, "sent_len_1": 66.8936, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7175, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.093, "doc_norm": 1.5191, "encoder_q-embeddings": 1875.2334, "encoder_q-layer.0": 1197.671, "encoder_q-layer.1": 1255.1428, "encoder_q-layer.10": 2343.5518, "encoder_q-layer.11": 5737.3647, "encoder_q-layer.2": 1409.9602, "encoder_q-layer.3": 1457.0651, "encoder_q-layer.4": 1544.3562, "encoder_q-layer.5": 1595.1669, "encoder_q-layer.6": 1799.9816, "encoder_q-layer.7": 2138.0605, "encoder_q-layer.8": 2583.8408, "encoder_q-layer.9": 2334.2212, "epoch": 0.68, "inbatch_neg_score": 0.5037, "inbatch_pos_score": 1.1943, "learning_rate": 1.7000000000000003e-05, "loss": 3.093, "norm_diff": 0.0679, "norm_loss": 0.0, "num_token_doc": 66.9208, "num_token_overlap": 15.8703, "num_token_query": 42.5736, "num_token_union": 68.6732, "num_word_context": 202.3341, "num_word_doc": 49.9546, "num_word_query": 32.1813, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3640.313, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5029, "query_norm": 1.4512, "queue_k_norm": 1.5194, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5736, "sent_len_1": 66.9208, "sent_len_max_0": 128.0, "sent_len_max_1": 187.715, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.0863, "doc_norm": 1.5204, "encoder_q-embeddings": 2093.4631, "encoder_q-layer.0": 1405.6172, "encoder_q-layer.1": 1554.1907, "encoder_q-layer.10": 2764.2278, "encoder_q-layer.11": 5937.1318, "encoder_q-layer.2": 1759.2087, "encoder_q-layer.3": 1862.0732, "encoder_q-layer.4": 2074.6365, "encoder_q-layer.5": 2233.5989, "encoder_q-layer.6": 2313.2803, "encoder_q-layer.7": 2667.7654, "encoder_q-layer.8": 2869.3579, "encoder_q-layer.9": 2593.5376, "epoch": 0.68, "inbatch_neg_score": 0.4986, "inbatch_pos_score": 1.1719, "learning_rate": 1.6944444444444446e-05, "loss": 3.0863, "norm_diff": 0.0876, "norm_loss": 0.0, "num_token_doc": 66.9204, "num_token_overlap": 15.9206, "num_token_query": 42.6006, "num_token_union": 68.6694, "num_word_context": 202.7344, "num_word_doc": 49.9414, "num_word_query": 32.1967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3975.4551, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4985, "query_norm": 1.4328, "queue_k_norm": 1.5193, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.6006, "sent_len_1": 66.9204, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.1126, "doc_norm": 1.5251, "encoder_q-embeddings": 2358.718, "encoder_q-layer.0": 1598.0743, "encoder_q-layer.1": 1672.0574, "encoder_q-layer.10": 2532.5464, "encoder_q-layer.11": 5598.3589, "encoder_q-layer.2": 1956.1526, "encoder_q-layer.3": 2015.2847, "encoder_q-layer.4": 2149.1318, "encoder_q-layer.5": 2203.3604, "encoder_q-layer.6": 2350.1306, "encoder_q-layer.7": 2501.7927, "encoder_q-layer.8": 2695.9431, "encoder_q-layer.9": 2264.1292, "epoch": 0.68, "inbatch_neg_score": 0.491, "inbatch_pos_score": 1.168, "learning_rate": 1.688888888888889e-05, "loss": 3.1126, "norm_diff": 0.0957, "norm_loss": 0.0, "num_token_doc": 66.6127, "num_token_overlap": 15.82, "num_token_query": 42.4513, "num_token_union": 68.4596, "num_word_context": 202.026, "num_word_doc": 49.6895, "num_word_query": 32.0585, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3970.7014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4934, "query_norm": 1.4294, "queue_k_norm": 1.5202, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4513, "sent_len_1": 66.6127, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3275, "stdk": 0.0489, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 3.1079, "doc_norm": 1.5208, "encoder_q-embeddings": 2087.5811, "encoder_q-layer.0": 1363.9148, "encoder_q-layer.1": 1495.8209, "encoder_q-layer.10": 2578.9185, "encoder_q-layer.11": 5465.8657, "encoder_q-layer.2": 1691.5759, "encoder_q-layer.3": 1743.8987, "encoder_q-layer.4": 1897.8977, "encoder_q-layer.5": 1923.6249, "encoder_q-layer.6": 2249.043, "encoder_q-layer.7": 2436.4351, "encoder_q-layer.8": 2778.6184, "encoder_q-layer.9": 2379.6196, "epoch": 0.68, "inbatch_neg_score": 0.4969, "inbatch_pos_score": 1.1992, "learning_rate": 1.6833333333333334e-05, "loss": 3.1079, "norm_diff": 0.0688, "norm_loss": 0.0, "num_token_doc": 66.8029, "num_token_overlap": 15.7686, "num_token_query": 42.1739, "num_token_union": 68.4535, "num_word_context": 202.3118, "num_word_doc": 49.884, "num_word_query": 31.8708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3722.4213, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4961, "query_norm": 1.452, "queue_k_norm": 1.5191, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1739, "sent_len_1": 66.8029, "sent_len_max_0": 128.0, "sent_len_max_1": 188.34, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0953, "doc_norm": 1.5199, "encoder_q-embeddings": 2314.2168, "encoder_q-layer.0": 1491.3536, "encoder_q-layer.1": 1765.771, "encoder_q-layer.10": 2355.0876, "encoder_q-layer.11": 5436.6895, "encoder_q-layer.2": 2061.3674, "encoder_q-layer.3": 2182.333, "encoder_q-layer.4": 2403.7812, "encoder_q-layer.5": 2699.844, "encoder_q-layer.6": 3089.0422, "encoder_q-layer.7": 3209.1433, "encoder_q-layer.8": 3652.0232, "encoder_q-layer.9": 2620.0371, "epoch": 0.68, "inbatch_neg_score": 0.4946, "inbatch_pos_score": 1.1768, "learning_rate": 1.677777777777778e-05, "loss": 3.0953, "norm_diff": 0.0815, "norm_loss": 0.0, "num_token_doc": 66.8658, "num_token_overlap": 15.7475, "num_token_query": 42.3435, "num_token_union": 68.5763, "num_word_context": 202.359, "num_word_doc": 49.8858, "num_word_query": 31.9733, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4345.4543, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4941, "query_norm": 1.4385, "queue_k_norm": 1.5189, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3435, "sent_len_1": 66.8658, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5012, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 58.4961, "active_queue_size": 16384.0, "cl_loss": 3.1064, "doc_norm": 1.5264, "encoder_q-embeddings": 2961.9231, "encoder_q-layer.0": 2000.7245, "encoder_q-layer.1": 2309.7446, "encoder_q-layer.10": 2190.6372, "encoder_q-layer.11": 5313.7549, "encoder_q-layer.2": 2658.906, "encoder_q-layer.3": 2739.8093, "encoder_q-layer.4": 3044.9661, "encoder_q-layer.5": 3193.2332, "encoder_q-layer.6": 3255.8123, "encoder_q-layer.7": 3016.9324, "encoder_q-layer.8": 3071.6296, "encoder_q-layer.9": 2426.2146, "epoch": 0.68, "inbatch_neg_score": 0.497, "inbatch_pos_score": 1.2051, "learning_rate": 1.6722222222222222e-05, "loss": 3.1064, "norm_diff": 0.0826, "norm_loss": 0.0, "num_token_doc": 66.8762, "num_token_overlap": 15.7986, "num_token_query": 42.3251, "num_token_union": 68.5391, "num_word_context": 202.4604, "num_word_doc": 49.9081, "num_word_query": 31.9929, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4582.037, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4983, "query_norm": 1.4438, "queue_k_norm": 1.5194, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3251, "sent_len_1": 66.8762, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.195, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0984, "doc_norm": 1.5167, "encoder_q-embeddings": 2294.02, "encoder_q-layer.0": 1499.8849, "encoder_q-layer.1": 1579.2538, "encoder_q-layer.10": 2247.77, "encoder_q-layer.11": 5162.9287, "encoder_q-layer.2": 1856.2612, "encoder_q-layer.3": 1951.0464, "encoder_q-layer.4": 1989.0566, "encoder_q-layer.5": 2008.5881, "encoder_q-layer.6": 2160.3938, "encoder_q-layer.7": 2248.5796, "encoder_q-layer.8": 2408.2053, "encoder_q-layer.9": 2202.3462, "epoch": 0.68, "inbatch_neg_score": 0.5012, "inbatch_pos_score": 1.1865, "learning_rate": 1.6666666666666667e-05, "loss": 3.0984, "norm_diff": 0.0775, "norm_loss": 0.0, "num_token_doc": 66.6529, "num_token_overlap": 15.8, "num_token_query": 42.2173, "num_token_union": 68.3815, "num_word_context": 202.0646, "num_word_doc": 49.7743, "num_word_query": 31.8944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3697.4168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5015, "query_norm": 1.4392, "queue_k_norm": 1.5199, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2173, "sent_len_1": 66.6529, "sent_len_max_0": 127.995, "sent_len_max_1": 188.5613, "stdk": 0.0485, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 26.9519, "dev_samples_per_second": 2.375, "dev_steps_per_second": 0.037, "epoch": 0.68, "step": 70000, "test_accuracy": 93.73779296875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3524443507194519, "test_doc_norm": 1.5049092769622803, "test_inbatch_neg_score": 0.8612649440765381, "test_inbatch_pos_score": 1.7867965698242188, "test_loss": 0.3524443507194519, "test_loss_align": 0.9970858097076416, "test_loss_unif": 3.498528003692627, "test_loss_unif_q@queue": 3.498528003692627, "test_norm_diff": 0.010010289028286934, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5011191368103027, "test_query_norm": 1.5093294382095337, "test_queue_k_norm": 1.5198850631713867, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042713265866041183, "test_stdq": 0.041845474392175674, "test_stdqueue_k": 0.0486903041601181, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.9519, "dev_samples_per_second": 2.375, "dev_steps_per_second": 0.037, "epoch": 0.68, "eval_beir-arguana_ndcg@10": 0.3904, "eval_beir-arguana_recall@10": 0.65007, "eval_beir-arguana_recall@100": 0.93528, "eval_beir-arguana_recall@20": 0.78805, "eval_beir-avg_ndcg@10": 0.3781095, "eval_beir-avg_recall@10": 0.44849558333333334, "eval_beir-avg_recall@100": 0.6288388333333333, "eval_beir-avg_recall@20": 0.5093417499999999, "eval_beir-cqadupstack_ndcg@10": 0.26490499999999995, "eval_beir-cqadupstack_recall@10": 0.35851583333333337, "eval_beir-cqadupstack_recall@100": 0.5928583333333334, "eval_beir-cqadupstack_recall@20": 0.4244675, "eval_beir-fiqa_ndcg@10": 0.25077, "eval_beir-fiqa_recall@10": 0.30971, "eval_beir-fiqa_recall@100": 0.58234, "eval_beir-fiqa_recall@20": 0.38086, "eval_beir-nfcorpus_ndcg@10": 0.2899, "eval_beir-nfcorpus_recall@10": 0.13935, "eval_beir-nfcorpus_recall@100": 0.27603, "eval_beir-nfcorpus_recall@20": 0.17725, "eval_beir-nq_ndcg@10": 0.26741, "eval_beir-nq_recall@10": 0.43482, "eval_beir-nq_recall@100": 0.77692, "eval_beir-nq_recall@20": 0.55777, "eval_beir-quora_ndcg@10": 0.7734, "eval_beir-quora_recall@10": 0.88268, "eval_beir-quora_recall@100": 0.97506, "eval_beir-quora_recall@20": 0.92389, "eval_beir-scidocs_ndcg@10": 0.14989, "eval_beir-scidocs_recall@10": 0.15628, "eval_beir-scidocs_recall@100": 0.36493, "eval_beir-scidocs_recall@20": 0.21747, "eval_beir-scifact_ndcg@10": 0.64594, "eval_beir-scifact_recall@10": 0.80111, "eval_beir-scifact_recall@100": 0.91656, "eval_beir-scifact_recall@20": 0.83689, "eval_beir-trec-covid_ndcg@10": 0.55875, "eval_beir-trec-covid_recall@10": 0.612, "eval_beir-trec-covid_recall@100": 0.4538, "eval_beir-trec-covid_recall@20": 0.583, "eval_beir-webis-touche2020_ndcg@10": 0.18973, "eval_beir-webis-touche2020_recall@10": 0.14042, "eval_beir-webis-touche2020_recall@100": 0.41461, "eval_beir-webis-touche2020_recall@20": 0.20377, "eval_senteval-avg_sts": 0.7510210488751157, "eval_senteval-sickr_spearman": 0.7164704780253431, "eval_senteval-stsb_spearman": 0.7855716197248883, "step": 70000, "test_accuracy": 93.73779296875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3524443507194519, "test_doc_norm": 1.5049092769622803, "test_inbatch_neg_score": 0.8612649440765381, "test_inbatch_pos_score": 1.7867965698242188, "test_loss": 0.3524443507194519, "test_loss_align": 0.9970858097076416, "test_loss_unif": 3.498528003692627, "test_loss_unif_q@queue": 3.498528003692627, "test_norm_diff": 0.010010289028286934, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5011191368103027, "test_query_norm": 1.5093294382095337, "test_queue_k_norm": 1.5198850631713867, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042713265866041183, "test_stdq": 0.041845474392175674, "test_stdqueue_k": 0.0486903041601181, "test_stdqueue_q": 0.0 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.0959, "doc_norm": 1.5213, "encoder_q-embeddings": 2256.2043, "encoder_q-layer.0": 1517.8401, "encoder_q-layer.1": 1665.2407, "encoder_q-layer.10": 2395.2966, "encoder_q-layer.11": 5479.1055, "encoder_q-layer.2": 1951.8073, "encoder_q-layer.3": 2154.147, "encoder_q-layer.4": 2344.6687, "encoder_q-layer.5": 2434.54, "encoder_q-layer.6": 2429.4673, "encoder_q-layer.7": 2519.9783, "encoder_q-layer.8": 2810.0735, "encoder_q-layer.9": 2307.2451, "epoch": 0.68, "inbatch_neg_score": 0.4989, "inbatch_pos_score": 1.1895, "learning_rate": 1.661111111111111e-05, "loss": 3.0959, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.9326, "num_token_overlap": 15.8465, "num_token_query": 42.3972, "num_token_union": 68.5735, "num_word_context": 202.5218, "num_word_doc": 49.9443, "num_word_query": 32.031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3978.9943, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4985, "query_norm": 1.4443, "queue_k_norm": 1.5203, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3972, "sent_len_1": 66.9326, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0563, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0969, "doc_norm": 1.5247, "encoder_q-embeddings": 1200.9022, "encoder_q-layer.0": 829.3331, "encoder_q-layer.1": 927.679, "encoder_q-layer.10": 1151.2991, "encoder_q-layer.11": 2692.8494, "encoder_q-layer.2": 1081.0255, "encoder_q-layer.3": 1118.522, "encoder_q-layer.4": 1197.1932, "encoder_q-layer.5": 1278.3817, "encoder_q-layer.6": 1349.1519, "encoder_q-layer.7": 1394.7458, "encoder_q-layer.8": 1427.7871, "encoder_q-layer.9": 1226.9104, "epoch": 0.69, "inbatch_neg_score": 0.4935, "inbatch_pos_score": 1.2012, "learning_rate": 1.655555555555556e-05, "loss": 3.0969, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.9079, "num_token_overlap": 15.8196, "num_token_query": 42.3184, "num_token_union": 68.5467, "num_word_context": 202.4671, "num_word_doc": 49.9413, "num_word_query": 31.9546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2032.1403, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4944, "query_norm": 1.4478, "queue_k_norm": 1.5223, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3184, "sent_len_1": 66.9079, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2163, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0975, "doc_norm": 1.5251, "encoder_q-embeddings": 1071.7742, "encoder_q-layer.0": 711.4434, "encoder_q-layer.1": 748.2282, "encoder_q-layer.10": 1218.7109, "encoder_q-layer.11": 2942.2739, "encoder_q-layer.2": 862.3715, "encoder_q-layer.3": 886.1083, "encoder_q-layer.4": 940.6915, "encoder_q-layer.5": 989.9175, "encoder_q-layer.6": 1056.8756, "encoder_q-layer.7": 1163.246, "encoder_q-layer.8": 1375.3425, "encoder_q-layer.9": 1233.9856, "epoch": 0.69, "inbatch_neg_score": 0.5, "inbatch_pos_score": 1.1914, "learning_rate": 1.65e-05, "loss": 3.0975, "norm_diff": 0.0714, "norm_loss": 0.0, "num_token_doc": 66.9468, "num_token_overlap": 15.8274, "num_token_query": 42.27, "num_token_union": 68.5304, "num_word_context": 202.3558, "num_word_doc": 49.9593, "num_word_query": 31.9448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1926.4963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5005, "query_norm": 1.4537, "queue_k_norm": 1.5219, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.27, "sent_len_1": 66.9468, "sent_len_max_0": 127.975, "sent_len_max_1": 188.2837, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0848, "doc_norm": 1.5233, "encoder_q-embeddings": 1997.5957, "encoder_q-layer.0": 1431.3862, "encoder_q-layer.1": 1723.9738, "encoder_q-layer.10": 1254.375, "encoder_q-layer.11": 2959.1672, "encoder_q-layer.2": 2214.083, "encoder_q-layer.3": 2405.717, "encoder_q-layer.4": 2550.3367, "encoder_q-layer.5": 2562.1035, "encoder_q-layer.6": 2712.4893, "encoder_q-layer.7": 2449.7637, "encoder_q-layer.8": 1960.9597, "encoder_q-layer.9": 1270.67, "epoch": 0.69, "inbatch_neg_score": 0.4968, "inbatch_pos_score": 1.1719, "learning_rate": 1.6444444444444447e-05, "loss": 3.0848, "norm_diff": 0.0958, "norm_loss": 0.0, "num_token_doc": 66.6497, "num_token_overlap": 15.9152, "num_token_query": 42.5381, "num_token_union": 68.472, "num_word_context": 202.5233, "num_word_doc": 49.7071, "num_word_query": 32.1737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3201.6515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4971, "query_norm": 1.4275, "queue_k_norm": 1.522, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5381, "sent_len_1": 66.6497, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6138, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0931, "doc_norm": 1.5221, "encoder_q-embeddings": 1134.6277, "encoder_q-layer.0": 732.7596, "encoder_q-layer.1": 821.3831, "encoder_q-layer.10": 1156.6266, "encoder_q-layer.11": 2578.876, "encoder_q-layer.2": 933.9293, "encoder_q-layer.3": 1002.7548, "encoder_q-layer.4": 1088.2515, "encoder_q-layer.5": 1171.1324, "encoder_q-layer.6": 1222.8044, "encoder_q-layer.7": 1245.2667, "encoder_q-layer.8": 1335.7931, "encoder_q-layer.9": 1153.4829, "epoch": 0.69, "inbatch_neg_score": 0.4913, "inbatch_pos_score": 1.1846, "learning_rate": 1.638888888888889e-05, "loss": 3.0931, "norm_diff": 0.0899, "norm_loss": 0.0, "num_token_doc": 66.9108, "num_token_overlap": 15.9063, "num_token_query": 42.4989, "num_token_union": 68.6229, "num_word_context": 202.5403, "num_word_doc": 49.9813, "num_word_query": 32.1266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1919.2189, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4915, "query_norm": 1.4322, "queue_k_norm": 1.5215, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4989, "sent_len_1": 66.9108, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.4925, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0774, "doc_norm": 1.5216, "encoder_q-embeddings": 1513.8203, "encoder_q-layer.0": 1020.7022, "encoder_q-layer.1": 1103.6233, "encoder_q-layer.10": 1218.1609, "encoder_q-layer.11": 2846.4023, "encoder_q-layer.2": 1250.854, "encoder_q-layer.3": 1263.8154, "encoder_q-layer.4": 1323.1956, "encoder_q-layer.5": 1349.8644, "encoder_q-layer.6": 1447.0869, "encoder_q-layer.7": 1456.1388, "encoder_q-layer.8": 1520.8538, "encoder_q-layer.9": 1222.736, "epoch": 0.69, "inbatch_neg_score": 0.5002, "inbatch_pos_score": 1.1855, "learning_rate": 1.6333333333333335e-05, "loss": 3.0774, "norm_diff": 0.0642, "norm_loss": 0.0, "num_token_doc": 66.9418, "num_token_overlap": 15.8811, "num_token_query": 42.3905, "num_token_union": 68.5726, "num_word_context": 202.2656, "num_word_doc": 49.9528, "num_word_query": 32.0209, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2274.6073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.502, "query_norm": 1.4574, "queue_k_norm": 1.5224, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3905, "sent_len_1": 66.9418, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.1488, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.0766, "doc_norm": 1.5256, "encoder_q-embeddings": 1235.2473, "encoder_q-layer.0": 828.5122, "encoder_q-layer.1": 902.9932, "encoder_q-layer.10": 1280.6326, "encoder_q-layer.11": 3024.5596, "encoder_q-layer.2": 1016.4658, "encoder_q-layer.3": 1016.0638, "encoder_q-layer.4": 1121.5165, "encoder_q-layer.5": 1152.4084, "encoder_q-layer.6": 1153.5391, "encoder_q-layer.7": 1244.3903, "encoder_q-layer.8": 1414.8256, "encoder_q-layer.9": 1208.7322, "epoch": 0.69, "inbatch_neg_score": 0.5014, "inbatch_pos_score": 1.1699, "learning_rate": 1.6277777777777777e-05, "loss": 3.0766, "norm_diff": 0.083, "norm_loss": 0.0, "num_token_doc": 66.9958, "num_token_overlap": 15.8788, "num_token_query": 42.374, "num_token_union": 68.6315, "num_word_context": 202.5141, "num_word_doc": 49.9644, "num_word_query": 32.0036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2114.9463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.501, "query_norm": 1.4426, "queue_k_norm": 1.523, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.374, "sent_len_1": 66.9958, "sent_len_max_0": 127.995, "sent_len_max_1": 187.2812, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 58.7891, "active_queue_size": 16384.0, "cl_loss": 3.0884, "doc_norm": 1.5248, "encoder_q-embeddings": 1074.6848, "encoder_q-layer.0": 728.181, "encoder_q-layer.1": 755.569, "encoder_q-layer.10": 1258.2, "encoder_q-layer.11": 2797.6379, "encoder_q-layer.2": 866.7516, "encoder_q-layer.3": 915.2388, "encoder_q-layer.4": 974.2092, "encoder_q-layer.5": 984.3458, "encoder_q-layer.6": 1100.353, "encoder_q-layer.7": 1197.1691, "encoder_q-layer.8": 1360.7036, "encoder_q-layer.9": 1219.4817, "epoch": 0.69, "inbatch_neg_score": 0.4973, "inbatch_pos_score": 1.21, "learning_rate": 1.6222222222222223e-05, "loss": 3.0884, "norm_diff": 0.0701, "norm_loss": 0.0, "num_token_doc": 66.9705, "num_token_overlap": 15.9089, "num_token_query": 42.504, "num_token_union": 68.5928, "num_word_context": 202.547, "num_word_doc": 49.9732, "num_word_query": 32.1372, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1905.7436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4973, "query_norm": 1.4546, "queue_k_norm": 1.5238, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.504, "sent_len_1": 66.9705, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9575, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0969, "doc_norm": 1.527, "encoder_q-embeddings": 995.5873, "encoder_q-layer.0": 648.3095, "encoder_q-layer.1": 686.0454, "encoder_q-layer.10": 1239.9104, "encoder_q-layer.11": 2850.667, "encoder_q-layer.2": 751.8356, "encoder_q-layer.3": 795.2762, "encoder_q-layer.4": 860.8171, "encoder_q-layer.5": 881.5502, "encoder_q-layer.6": 1010.7728, "encoder_q-layer.7": 1109.1133, "encoder_q-layer.8": 1281.0499, "encoder_q-layer.9": 1241.1163, "epoch": 0.69, "inbatch_neg_score": 0.4991, "inbatch_pos_score": 1.1924, "learning_rate": 1.6166666666666665e-05, "loss": 3.0969, "norm_diff": 0.0835, "norm_loss": 0.0, "num_token_doc": 66.6826, "num_token_overlap": 15.8164, "num_token_query": 42.3399, "num_token_union": 68.4478, "num_word_context": 202.4277, "num_word_doc": 49.7588, "num_word_query": 31.9876, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1876.2553, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5, "query_norm": 1.4435, "queue_k_norm": 1.5221, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3399, "sent_len_1": 66.6826, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.6538, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.1051, "doc_norm": 1.5251, "encoder_q-embeddings": 2166.0562, "encoder_q-layer.0": 1697.8284, "encoder_q-layer.1": 2399.3362, "encoder_q-layer.10": 1248.938, "encoder_q-layer.11": 2868.5227, "encoder_q-layer.2": 3022.7151, "encoder_q-layer.3": 2656.9685, "encoder_q-layer.4": 3010.8479, "encoder_q-layer.5": 2557.8191, "encoder_q-layer.6": 2616.7751, "encoder_q-layer.7": 2354.5098, "encoder_q-layer.8": 2017.717, "encoder_q-layer.9": 1382.1912, "epoch": 0.69, "inbatch_neg_score": 0.4995, "inbatch_pos_score": 1.1777, "learning_rate": 1.6111111111111115e-05, "loss": 3.1051, "norm_diff": 0.0861, "norm_loss": 0.0, "num_token_doc": 66.729, "num_token_overlap": 15.7409, "num_token_query": 42.1221, "num_token_union": 68.3562, "num_word_context": 202.4336, "num_word_doc": 49.7755, "num_word_query": 31.7963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3526.9184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4998, "query_norm": 1.439, "queue_k_norm": 1.5234, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1221, "sent_len_1": 66.729, "sent_len_max_0": 128.0, "sent_len_max_1": 191.9437, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.1027, "doc_norm": 1.5214, "encoder_q-embeddings": 559.9401, "encoder_q-layer.0": 388.9943, "encoder_q-layer.1": 426.6753, "encoder_q-layer.10": 640.3766, "encoder_q-layer.11": 1365.8804, "encoder_q-layer.2": 486.5059, "encoder_q-layer.3": 477.2548, "encoder_q-layer.4": 492.8566, "encoder_q-layer.5": 498.5477, "encoder_q-layer.6": 505.5878, "encoder_q-layer.7": 573.8372, "encoder_q-layer.8": 658.5676, "encoder_q-layer.9": 578.8989, "epoch": 0.69, "inbatch_neg_score": 0.4997, "inbatch_pos_score": 1.1797, "learning_rate": 1.6055555555555557e-05, "loss": 3.1027, "norm_diff": 0.0731, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 15.8396, "num_token_query": 42.3546, "num_token_union": 68.4172, "num_word_context": 202.0088, "num_word_doc": 49.7992, "num_word_query": 31.9976, "postclip_grad_norm": 1.0, "preclip_grad_norm": 971.3836, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4985, "query_norm": 1.4483, "queue_k_norm": 1.5226, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3546, "sent_len_1": 66.7179, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4837, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0942, "doc_norm": 1.5208, "encoder_q-embeddings": 507.2488, "encoder_q-layer.0": 333.6613, "encoder_q-layer.1": 354.9087, "encoder_q-layer.10": 631.1376, "encoder_q-layer.11": 1404.4955, "encoder_q-layer.2": 399.0287, "encoder_q-layer.3": 424.0506, "encoder_q-layer.4": 433.8353, "encoder_q-layer.5": 452.3794, "encoder_q-layer.6": 499.1201, "encoder_q-layer.7": 549.0314, "encoder_q-layer.8": 657.7062, "encoder_q-layer.9": 577.9404, "epoch": 0.7, "inbatch_neg_score": 0.4981, "inbatch_pos_score": 1.1953, "learning_rate": 1.6000000000000003e-05, "loss": 3.0942, "norm_diff": 0.0633, "norm_loss": 0.0, "num_token_doc": 66.9624, "num_token_overlap": 15.8298, "num_token_query": 42.253, "num_token_union": 68.5361, "num_word_context": 202.1977, "num_word_doc": 49.9695, "num_word_query": 31.9294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 939.4505, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4978, "query_norm": 1.4575, "queue_k_norm": 1.524, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.253, "sent_len_1": 66.9624, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0575, "stdk": 0.0485, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0952, "doc_norm": 1.5335, "encoder_q-embeddings": 565.6505, "encoder_q-layer.0": 361.2928, "encoder_q-layer.1": 390.6676, "encoder_q-layer.10": 604.7505, "encoder_q-layer.11": 1398.3108, "encoder_q-layer.2": 428.2256, "encoder_q-layer.3": 438.5283, "encoder_q-layer.4": 478.6308, "encoder_q-layer.5": 470.7042, "encoder_q-layer.6": 512.9748, "encoder_q-layer.7": 568.3475, "encoder_q-layer.8": 647.4036, "encoder_q-layer.9": 581.2493, "epoch": 0.7, "inbatch_neg_score": 0.5018, "inbatch_pos_score": 1.1943, "learning_rate": 1.5944444444444445e-05, "loss": 3.0952, "norm_diff": 0.0757, "norm_loss": 0.0, "num_token_doc": 66.8186, "num_token_overlap": 15.8081, "num_token_query": 42.2005, "num_token_union": 68.4798, "num_word_context": 202.4753, "num_word_doc": 49.8852, "num_word_query": 31.878, "postclip_grad_norm": 1.0, "preclip_grad_norm": 965.3457, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.501, "query_norm": 1.4578, "queue_k_norm": 1.5223, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2005, "sent_len_1": 66.8186, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7012, "stdk": 0.0491, "stdq": 0.0455, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0914, "doc_norm": 1.5274, "encoder_q-embeddings": 673.1108, "encoder_q-layer.0": 489.2609, "encoder_q-layer.1": 535.607, "encoder_q-layer.10": 607.2878, "encoder_q-layer.11": 1348.142, "encoder_q-layer.2": 670.7767, "encoder_q-layer.3": 592.3753, "encoder_q-layer.4": 564.6838, "encoder_q-layer.5": 573.8909, "encoder_q-layer.6": 642.4885, "encoder_q-layer.7": 627.1884, "encoder_q-layer.8": 663.2756, "encoder_q-layer.9": 584.1228, "epoch": 0.7, "inbatch_neg_score": 0.4959, "inbatch_pos_score": 1.1768, "learning_rate": 1.588888888888889e-05, "loss": 3.0914, "norm_diff": 0.0724, "norm_loss": 0.0, "num_token_doc": 67.0508, "num_token_overlap": 15.8477, "num_token_query": 42.4125, "num_token_union": 68.6933, "num_word_context": 202.7443, "num_word_doc": 49.9972, "num_word_query": 32.0124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1047.1114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4961, "query_norm": 1.455, "queue_k_norm": 1.5211, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4125, "sent_len_1": 67.0508, "sent_len_max_0": 127.99, "sent_len_max_1": 192.025, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.0972, "doc_norm": 1.5258, "encoder_q-embeddings": 569.5034, "encoder_q-layer.0": 365.637, "encoder_q-layer.1": 400.109, "encoder_q-layer.10": 606.7836, "encoder_q-layer.11": 1417.2123, "encoder_q-layer.2": 474.6187, "encoder_q-layer.3": 496.2164, "encoder_q-layer.4": 517.9194, "encoder_q-layer.5": 559.5928, "encoder_q-layer.6": 533.8649, "encoder_q-layer.7": 558.4385, "encoder_q-layer.8": 645.843, "encoder_q-layer.9": 581.785, "epoch": 0.7, "inbatch_neg_score": 0.4945, "inbatch_pos_score": 1.1553, "learning_rate": 1.5833333333333333e-05, "loss": 3.0972, "norm_diff": 0.0998, "norm_loss": 0.0, "num_token_doc": 66.8051, "num_token_overlap": 15.7984, "num_token_query": 42.2213, "num_token_union": 68.496, "num_word_context": 202.067, "num_word_doc": 49.8556, "num_word_query": 31.8885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 985.2046, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4944, "query_norm": 1.4261, "queue_k_norm": 1.5233, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2213, "sent_len_1": 66.8051, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2287, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 3.1031, "doc_norm": 1.5305, "encoder_q-embeddings": 1669.1478, "encoder_q-layer.0": 1179.1403, "encoder_q-layer.1": 1366.3232, "encoder_q-layer.10": 580.1838, "encoder_q-layer.11": 1372.4331, "encoder_q-layer.2": 1565.9186, "encoder_q-layer.3": 1622.2906, "encoder_q-layer.4": 1673.7522, "encoder_q-layer.5": 1945.6182, "encoder_q-layer.6": 1592.8561, "encoder_q-layer.7": 1377.1167, "encoder_q-layer.8": 1164.979, "encoder_q-layer.9": 718.2861, "epoch": 0.7, "inbatch_neg_score": 0.4954, "inbatch_pos_score": 1.1895, "learning_rate": 1.577777777777778e-05, "loss": 3.1031, "norm_diff": 0.0943, "norm_loss": 0.0, "num_token_doc": 66.9227, "num_token_overlap": 15.8124, "num_token_query": 42.3857, "num_token_union": 68.6245, "num_word_context": 202.2352, "num_word_doc": 49.9156, "num_word_query": 31.9961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2174.8168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4963, "query_norm": 1.4362, "queue_k_norm": 1.5243, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3857, "sent_len_1": 66.9227, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.115, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0944, "doc_norm": 1.5211, "encoder_q-embeddings": 822.9879, "encoder_q-layer.0": 530.6191, "encoder_q-layer.1": 659.6848, "encoder_q-layer.10": 659.3279, "encoder_q-layer.11": 1398.6954, "encoder_q-layer.2": 814.5009, "encoder_q-layer.3": 891.3738, "encoder_q-layer.4": 1053.2235, "encoder_q-layer.5": 1159.9166, "encoder_q-layer.6": 1223.4888, "encoder_q-layer.7": 1087.2858, "encoder_q-layer.8": 923.4509, "encoder_q-layer.9": 577.1444, "epoch": 0.7, "inbatch_neg_score": 0.4901, "inbatch_pos_score": 1.1855, "learning_rate": 1.5722222222222225e-05, "loss": 3.0944, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.9843, "num_token_overlap": 15.8167, "num_token_query": 42.3537, "num_token_union": 68.6152, "num_word_context": 202.4936, "num_word_doc": 49.9869, "num_word_query": 31.9777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1417.664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4915, "query_norm": 1.4352, "queue_k_norm": 1.5222, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3537, "sent_len_1": 66.9843, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6637, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0914, "doc_norm": 1.5241, "encoder_q-embeddings": 856.4689, "encoder_q-layer.0": 591.0312, "encoder_q-layer.1": 677.8605, "encoder_q-layer.10": 609.9692, "encoder_q-layer.11": 1398.2441, "encoder_q-layer.2": 901.6448, "encoder_q-layer.3": 1014.1163, "encoder_q-layer.4": 1149.2595, "encoder_q-layer.5": 1296.5127, "encoder_q-layer.6": 1253.1182, "encoder_q-layer.7": 1308.4156, "encoder_q-layer.8": 1123.041, "encoder_q-layer.9": 716.9578, "epoch": 0.7, "inbatch_neg_score": 0.493, "inbatch_pos_score": 1.1758, "learning_rate": 1.5666666666666667e-05, "loss": 3.0914, "norm_diff": 0.0982, "norm_loss": 0.0, "num_token_doc": 66.7018, "num_token_overlap": 15.7505, "num_token_query": 42.119, "num_token_union": 68.383, "num_word_context": 202.2393, "num_word_doc": 49.803, "num_word_query": 31.8464, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1536.4079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4937, "query_norm": 1.4259, "queue_k_norm": 1.523, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.119, "sent_len_1": 66.7018, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2688, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.093, "doc_norm": 1.5202, "encoder_q-embeddings": 849.8725, "encoder_q-layer.0": 619.1953, "encoder_q-layer.1": 788.5154, "encoder_q-layer.10": 701.2611, "encoder_q-layer.11": 1429.0848, "encoder_q-layer.2": 1102.3717, "encoder_q-layer.3": 1090.7769, "encoder_q-layer.4": 1148.0364, "encoder_q-layer.5": 1111.8097, "encoder_q-layer.6": 1291.3153, "encoder_q-layer.7": 975.4771, "encoder_q-layer.8": 780.6603, "encoder_q-layer.9": 693.0065, "epoch": 0.7, "inbatch_neg_score": 0.4952, "inbatch_pos_score": 1.1719, "learning_rate": 1.5611111111111113e-05, "loss": 3.093, "norm_diff": 0.0561, "norm_loss": 0.0, "num_token_doc": 66.6807, "num_token_overlap": 15.7794, "num_token_query": 42.1497, "num_token_union": 68.3284, "num_word_context": 201.9573, "num_word_doc": 49.7336, "num_word_query": 31.8399, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1476.0075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4946, "query_norm": 1.4641, "queue_k_norm": 1.5228, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1497, "sent_len_1": 66.6807, "sent_len_max_0": 128.0, "sent_len_max_1": 192.1887, "stdk": 0.0485, "stdq": 0.046, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0872, "doc_norm": 1.5185, "encoder_q-embeddings": 738.2962, "encoder_q-layer.0": 481.2775, "encoder_q-layer.1": 547.2245, "encoder_q-layer.10": 642.6724, "encoder_q-layer.11": 1329.5253, "encoder_q-layer.2": 659.0143, "encoder_q-layer.3": 699.7587, "encoder_q-layer.4": 752.8373, "encoder_q-layer.5": 813.6043, "encoder_q-layer.6": 845.188, "encoder_q-layer.7": 813.3347, "encoder_q-layer.8": 762.8995, "encoder_q-layer.9": 595.6531, "epoch": 0.7, "inbatch_neg_score": 0.4936, "inbatch_pos_score": 1.1914, "learning_rate": 1.5555555555555555e-05, "loss": 3.0872, "norm_diff": 0.0691, "norm_loss": 0.0, "num_token_doc": 66.6752, "num_token_overlap": 15.8143, "num_token_query": 42.3215, "num_token_union": 68.4449, "num_word_context": 202.2403, "num_word_doc": 49.7941, "num_word_query": 31.9953, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1154.0451, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4937, "query_norm": 1.4495, "queue_k_norm": 1.5242, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3215, "sent_len_1": 66.6752, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7237, "stdk": 0.0485, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0915, "doc_norm": 1.5242, "encoder_q-embeddings": 757.9695, "encoder_q-layer.0": 516.2296, "encoder_q-layer.1": 543.3015, "encoder_q-layer.10": 617.8425, "encoder_q-layer.11": 1430.9948, "encoder_q-layer.2": 638.2324, "encoder_q-layer.3": 653.5789, "encoder_q-layer.4": 644.4385, "encoder_q-layer.5": 643.7283, "encoder_q-layer.6": 663.2571, "encoder_q-layer.7": 657.6601, "encoder_q-layer.8": 693.3421, "encoder_q-layer.9": 584.5339, "epoch": 0.7, "inbatch_neg_score": 0.4939, "inbatch_pos_score": 1.1826, "learning_rate": 1.55e-05, "loss": 3.0915, "norm_diff": 0.0769, "norm_loss": 0.0, "num_token_doc": 66.8483, "num_token_overlap": 15.8068, "num_token_query": 42.4187, "num_token_union": 68.6354, "num_word_context": 202.6971, "num_word_doc": 49.9356, "num_word_query": 32.0745, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1104.3788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4932, "query_norm": 1.4473, "queue_k_norm": 1.5235, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4187, "sent_len_1": 66.8483, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.0225, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.099, "doc_norm": 1.526, "encoder_q-embeddings": 782.3236, "encoder_q-layer.0": 539.4377, "encoder_q-layer.1": 596.3928, "encoder_q-layer.10": 652.2876, "encoder_q-layer.11": 1397.9507, "encoder_q-layer.2": 683.6221, "encoder_q-layer.3": 735.0182, "encoder_q-layer.4": 753.8239, "encoder_q-layer.5": 776.323, "encoder_q-layer.6": 824.4346, "encoder_q-layer.7": 816.483, "encoder_q-layer.8": 823.9747, "encoder_q-layer.9": 652.1942, "epoch": 0.7, "inbatch_neg_score": 0.4903, "inbatch_pos_score": 1.166, "learning_rate": 1.5444444444444446e-05, "loss": 3.099, "norm_diff": 0.0869, "norm_loss": 0.0, "num_token_doc": 66.5554, "num_token_overlap": 15.8213, "num_token_query": 42.2742, "num_token_union": 68.325, "num_word_context": 202.4736, "num_word_doc": 49.674, "num_word_query": 31.9415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1204.6241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.491, "query_norm": 1.4391, "queue_k_norm": 1.5241, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2742, "sent_len_1": 66.5554, "sent_len_max_0": 127.995, "sent_len_max_1": 187.8487, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.0984, "doc_norm": 1.5227, "encoder_q-embeddings": 1148.6648, "encoder_q-layer.0": 713.8663, "encoder_q-layer.1": 821.8328, "encoder_q-layer.10": 670.1669, "encoder_q-layer.11": 1496.2981, "encoder_q-layer.2": 965.7121, "encoder_q-layer.3": 985.2423, "encoder_q-layer.4": 984.2844, "encoder_q-layer.5": 1027.9436, "encoder_q-layer.6": 1130.1481, "encoder_q-layer.7": 1151.012, "encoder_q-layer.8": 805.8447, "encoder_q-layer.9": 645.7009, "epoch": 0.71, "inbatch_neg_score": 0.4938, "inbatch_pos_score": 1.1582, "learning_rate": 1.538888888888889e-05, "loss": 3.0984, "norm_diff": 0.0845, "norm_loss": 0.0, "num_token_doc": 66.6235, "num_token_overlap": 15.773, "num_token_query": 42.2613, "num_token_union": 68.3494, "num_word_context": 201.9954, "num_word_doc": 49.7543, "num_word_query": 31.9555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1494.0965, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4927, "query_norm": 1.4382, "queue_k_norm": 1.5213, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2613, "sent_len_1": 66.6235, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6362, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0907, "doc_norm": 1.5267, "encoder_q-embeddings": 508.7155, "encoder_q-layer.0": 336.4676, "encoder_q-layer.1": 363.6925, "encoder_q-layer.10": 571.7928, "encoder_q-layer.11": 1304.8115, "encoder_q-layer.2": 422.7226, "encoder_q-layer.3": 446.2274, "encoder_q-layer.4": 470.6892, "encoder_q-layer.5": 532.1032, "encoder_q-layer.6": 566.4484, "encoder_q-layer.7": 646.7626, "encoder_q-layer.8": 615.4435, "encoder_q-layer.9": 537.8164, "epoch": 0.71, "inbatch_neg_score": 0.4872, "inbatch_pos_score": 1.1631, "learning_rate": 1.5333333333333334e-05, "loss": 3.0907, "norm_diff": 0.0897, "norm_loss": 0.0, "num_token_doc": 66.9327, "num_token_overlap": 15.8224, "num_token_query": 42.377, "num_token_union": 68.5765, "num_word_context": 202.1975, "num_word_doc": 49.8674, "num_word_query": 31.9922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 919.4322, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4883, "query_norm": 1.4371, "queue_k_norm": 1.5226, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.377, "sent_len_1": 66.9327, "sent_len_max_0": 128.0, "sent_len_max_1": 190.4462, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0992, "doc_norm": 1.521, "encoder_q-embeddings": 929.0656, "encoder_q-layer.0": 634.8615, "encoder_q-layer.1": 737.5759, "encoder_q-layer.10": 754.0034, "encoder_q-layer.11": 1529.1204, "encoder_q-layer.2": 1034.0442, "encoder_q-layer.3": 902.3828, "encoder_q-layer.4": 851.2382, "encoder_q-layer.5": 777.8615, "encoder_q-layer.6": 815.2723, "encoder_q-layer.7": 733.3558, "encoder_q-layer.8": 692.9613, "encoder_q-layer.9": 597.2891, "epoch": 0.71, "inbatch_neg_score": 0.4887, "inbatch_pos_score": 1.1543, "learning_rate": 1.527777777777778e-05, "loss": 3.0992, "norm_diff": 0.0998, "norm_loss": 0.0, "num_token_doc": 66.8483, "num_token_overlap": 15.832, "num_token_query": 42.2964, "num_token_union": 68.4705, "num_word_context": 202.1915, "num_word_doc": 49.8622, "num_word_query": 31.9408, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1301.7493, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4902, "query_norm": 1.4212, "queue_k_norm": 1.5241, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2964, "sent_len_1": 66.8483, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.9087, "stdk": 0.0486, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0745, "doc_norm": 1.5209, "encoder_q-embeddings": 582.8682, "encoder_q-layer.0": 421.8965, "encoder_q-layer.1": 478.457, "encoder_q-layer.10": 638.4722, "encoder_q-layer.11": 1428.4307, "encoder_q-layer.2": 590.1326, "encoder_q-layer.3": 544.4677, "encoder_q-layer.4": 584.7062, "encoder_q-layer.5": 515.5393, "encoder_q-layer.6": 575.5359, "encoder_q-layer.7": 643.3022, "encoder_q-layer.8": 639.1224, "encoder_q-layer.9": 606.8506, "epoch": 0.71, "inbatch_neg_score": 0.4939, "inbatch_pos_score": 1.2031, "learning_rate": 1.5222222222222224e-05, "loss": 3.0745, "norm_diff": 0.0582, "norm_loss": 0.0, "num_token_doc": 66.943, "num_token_overlap": 15.8954, "num_token_query": 42.3407, "num_token_union": 68.5109, "num_word_context": 202.3218, "num_word_doc": 49.9489, "num_word_query": 31.9919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1020.5305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4922, "query_norm": 1.4627, "queue_k_norm": 1.524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3407, "sent_len_1": 66.943, "sent_len_max_0": 127.9887, "sent_len_max_1": 188.8862, "stdk": 0.0485, "stdq": 0.0459, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.0999, "doc_norm": 1.5252, "encoder_q-embeddings": 736.2675, "encoder_q-layer.0": 487.9532, "encoder_q-layer.1": 531.1927, "encoder_q-layer.10": 671.188, "encoder_q-layer.11": 1483.7794, "encoder_q-layer.2": 654.9027, "encoder_q-layer.3": 625.1742, "encoder_q-layer.4": 694.236, "encoder_q-layer.5": 687.0692, "encoder_q-layer.6": 637.8514, "encoder_q-layer.7": 704.6658, "encoder_q-layer.8": 762.5068, "encoder_q-layer.9": 648.5323, "epoch": 0.71, "inbatch_neg_score": 0.4897, "inbatch_pos_score": 1.1611, "learning_rate": 1.5166666666666668e-05, "loss": 3.0999, "norm_diff": 0.1, "norm_loss": 0.0, "num_token_doc": 66.8772, "num_token_overlap": 15.8706, "num_token_query": 42.4014, "num_token_union": 68.5267, "num_word_context": 202.255, "num_word_doc": 49.9228, "num_word_query": 32.0258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1141.8802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4902, "query_norm": 1.4251, "queue_k_norm": 1.5227, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4014, "sent_len_1": 66.8772, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5788, "stdk": 0.0487, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0895, "doc_norm": 1.524, "encoder_q-embeddings": 1326.9086, "encoder_q-layer.0": 935.3234, "encoder_q-layer.1": 1129.8439, "encoder_q-layer.10": 646.0533, "encoder_q-layer.11": 1378.877, "encoder_q-layer.2": 1399.9138, "encoder_q-layer.3": 1550.1989, "encoder_q-layer.4": 1571.2728, "encoder_q-layer.5": 1381.4358, "encoder_q-layer.6": 1317.459, "encoder_q-layer.7": 1124.632, "encoder_q-layer.8": 796.064, "encoder_q-layer.9": 587.2838, "epoch": 0.71, "inbatch_neg_score": 0.4926, "inbatch_pos_score": 1.1826, "learning_rate": 1.5111111111111112e-05, "loss": 3.0895, "norm_diff": 0.0811, "norm_loss": 0.0, "num_token_doc": 66.5499, "num_token_overlap": 15.7939, "num_token_query": 42.2863, "num_token_union": 68.3681, "num_word_context": 202.4395, "num_word_doc": 49.6843, "num_word_query": 31.9366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1795.6525, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4922, "query_norm": 1.4429, "queue_k_norm": 1.5225, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2863, "sent_len_1": 66.5499, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2125, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.1052, "doc_norm": 1.5265, "encoder_q-embeddings": 559.7338, "encoder_q-layer.0": 396.7998, "encoder_q-layer.1": 421.7244, "encoder_q-layer.10": 622.1483, "encoder_q-layer.11": 1438.7725, "encoder_q-layer.2": 468.9248, "encoder_q-layer.3": 462.9292, "encoder_q-layer.4": 492.9715, "encoder_q-layer.5": 481.3434, "encoder_q-layer.6": 522.7451, "encoder_q-layer.7": 570.6415, "encoder_q-layer.8": 655.1582, "encoder_q-layer.9": 577.5235, "epoch": 0.71, "inbatch_neg_score": 0.4976, "inbatch_pos_score": 1.1885, "learning_rate": 1.5055555555555556e-05, "loss": 3.1052, "norm_diff": 0.0812, "norm_loss": 0.0, "num_token_doc": 66.7167, "num_token_overlap": 15.7573, "num_token_query": 42.3457, "num_token_union": 68.5257, "num_word_context": 202.217, "num_word_doc": 49.7897, "num_word_query": 31.9662, "postclip_grad_norm": 1.0, "preclip_grad_norm": 981.5425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4976, "query_norm": 1.4453, "queue_k_norm": 1.5231, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3457, "sent_len_1": 66.7167, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8187, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0765, "doc_norm": 1.5199, "encoder_q-embeddings": 597.1051, "encoder_q-layer.0": 405.0196, "encoder_q-layer.1": 438.087, "encoder_q-layer.10": 604.9365, "encoder_q-layer.11": 1417.3423, "encoder_q-layer.2": 494.6661, "encoder_q-layer.3": 512.7652, "encoder_q-layer.4": 539.8891, "encoder_q-layer.5": 571.9901, "encoder_q-layer.6": 640.1474, "encoder_q-layer.7": 648.6786, "encoder_q-layer.8": 679.8194, "encoder_q-layer.9": 587.4431, "epoch": 0.71, "inbatch_neg_score": 0.5012, "inbatch_pos_score": 1.1855, "learning_rate": 1.5e-05, "loss": 3.0765, "norm_diff": 0.0816, "norm_loss": 0.0, "num_token_doc": 66.9024, "num_token_overlap": 15.8765, "num_token_query": 42.4808, "num_token_union": 68.5556, "num_word_context": 202.4004, "num_word_doc": 49.9171, "num_word_query": 32.1005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1024.0649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4998, "query_norm": 1.4383, "queue_k_norm": 1.5234, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4808, "sent_len_1": 66.9024, "sent_len_max_0": 128.0, "sent_len_max_1": 189.97, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.082, "doc_norm": 1.5233, "encoder_q-embeddings": 1695.8506, "encoder_q-layer.0": 1140.1719, "encoder_q-layer.1": 1425.1951, "encoder_q-layer.10": 1333.2931, "encoder_q-layer.11": 2818.9915, "encoder_q-layer.2": 1806.6501, "encoder_q-layer.3": 1902.9872, "encoder_q-layer.4": 2193.6365, "encoder_q-layer.5": 2280.0952, "encoder_q-layer.6": 2328.0317, "encoder_q-layer.7": 2038.9176, "encoder_q-layer.8": 1560.9926, "encoder_q-layer.9": 1190.4746, "epoch": 0.71, "inbatch_neg_score": 0.497, "inbatch_pos_score": 1.1611, "learning_rate": 1.4944444444444444e-05, "loss": 3.082, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 66.6779, "num_token_overlap": 15.8279, "num_token_query": 42.3251, "num_token_union": 68.4315, "num_word_context": 201.8658, "num_word_doc": 49.7353, "num_word_query": 31.9773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2787.232, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4971, "query_norm": 1.4368, "queue_k_norm": 1.5221, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3251, "sent_len_1": 66.6779, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3925, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.08, "doc_norm": 1.5179, "encoder_q-embeddings": 992.9484, "encoder_q-layer.0": 666.901, "encoder_q-layer.1": 711.5811, "encoder_q-layer.10": 1251.5157, "encoder_q-layer.11": 2949.2527, "encoder_q-layer.2": 811.9765, "encoder_q-layer.3": 870.2744, "encoder_q-layer.4": 940.7151, "encoder_q-layer.5": 976.0835, "encoder_q-layer.6": 1055.2616, "encoder_q-layer.7": 1208.8582, "encoder_q-layer.8": 1382.3143, "encoder_q-layer.9": 1215.3978, "epoch": 0.71, "inbatch_neg_score": 0.5029, "inbatch_pos_score": 1.1934, "learning_rate": 1.4888888888888888e-05, "loss": 3.08, "norm_diff": 0.0594, "norm_loss": 0.0, "num_token_doc": 66.8711, "num_token_overlap": 15.827, "num_token_query": 42.477, "num_token_union": 68.6284, "num_word_context": 202.309, "num_word_doc": 49.93, "num_word_query": 32.0957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1955.0695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.502, "query_norm": 1.4584, "queue_k_norm": 1.5235, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.477, "sent_len_1": 66.8711, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1075, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0752, "doc_norm": 1.5251, "encoder_q-embeddings": 1777.7616, "encoder_q-layer.0": 1237.6523, "encoder_q-layer.1": 1376.3706, "encoder_q-layer.10": 1210.1826, "encoder_q-layer.11": 2803.1694, "encoder_q-layer.2": 1518.5045, "encoder_q-layer.3": 1564.3518, "encoder_q-layer.4": 1640.6564, "encoder_q-layer.5": 1609.4897, "encoder_q-layer.6": 1519.6874, "encoder_q-layer.7": 1496.9017, "encoder_q-layer.8": 1555.1626, "encoder_q-layer.9": 1194.9524, "epoch": 0.72, "inbatch_neg_score": 0.4995, "inbatch_pos_score": 1.1777, "learning_rate": 1.4833333333333336e-05, "loss": 3.0752, "norm_diff": 0.0837, "norm_loss": 0.0, "num_token_doc": 66.8349, "num_token_overlap": 15.8649, "num_token_query": 42.3295, "num_token_union": 68.4563, "num_word_context": 202.3566, "num_word_doc": 49.8777, "num_word_query": 31.956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2512.7958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4995, "query_norm": 1.4414, "queue_k_norm": 1.5237, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3295, "sent_len_1": 66.8349, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.5275, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.1044, "doc_norm": 1.525, "encoder_q-embeddings": 1741.7283, "encoder_q-layer.0": 1126.7194, "encoder_q-layer.1": 1349.3123, "encoder_q-layer.10": 1336.0322, "encoder_q-layer.11": 2817.3496, "encoder_q-layer.2": 1576.6604, "encoder_q-layer.3": 1807.1185, "encoder_q-layer.4": 1926.1638, "encoder_q-layer.5": 2122.0, "encoder_q-layer.6": 2175.4573, "encoder_q-layer.7": 2135.5354, "encoder_q-layer.8": 1789.9586, "encoder_q-layer.9": 1252.5696, "epoch": 0.72, "inbatch_neg_score": 0.5004, "inbatch_pos_score": 1.1895, "learning_rate": 1.477777777777778e-05, "loss": 3.1044, "norm_diff": 0.0837, "norm_loss": 0.0, "num_token_doc": 66.8452, "num_token_overlap": 15.7927, "num_token_query": 42.2018, "num_token_union": 68.4592, "num_word_context": 202.3553, "num_word_doc": 49.8542, "num_word_query": 31.8407, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2740.7924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.502, "query_norm": 1.4413, "queue_k_norm": 1.5228, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2018, "sent_len_1": 66.8452, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9975, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.1011, "doc_norm": 1.5218, "encoder_q-embeddings": 982.6896, "encoder_q-layer.0": 676.5638, "encoder_q-layer.1": 700.7603, "encoder_q-layer.10": 1168.0754, "encoder_q-layer.11": 2896.2712, "encoder_q-layer.2": 792.4008, "encoder_q-layer.3": 820.0902, "encoder_q-layer.4": 885.5098, "encoder_q-layer.5": 903.7146, "encoder_q-layer.6": 1072.6199, "encoder_q-layer.7": 1143.5593, "encoder_q-layer.8": 1278.2368, "encoder_q-layer.9": 1177.8282, "epoch": 0.72, "inbatch_neg_score": 0.5046, "inbatch_pos_score": 1.1855, "learning_rate": 1.4722222222222224e-05, "loss": 3.1011, "norm_diff": 0.0809, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 15.7733, "num_token_query": 42.32, "num_token_union": 68.4384, "num_word_context": 202.3176, "num_word_doc": 49.7184, "num_word_query": 31.9549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1899.0586, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5054, "query_norm": 1.441, "queue_k_norm": 1.5235, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.32, "sent_len_1": 66.6695, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.5213, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.0931, "doc_norm": 1.5257, "encoder_q-embeddings": 1308.0618, "encoder_q-layer.0": 858.3353, "encoder_q-layer.1": 935.0426, "encoder_q-layer.10": 1279.1525, "encoder_q-layer.11": 2861.0547, "encoder_q-layer.2": 1120.9161, "encoder_q-layer.3": 1196.4556, "encoder_q-layer.4": 1326.4535, "encoder_q-layer.5": 1351.9114, "encoder_q-layer.6": 1546.7526, "encoder_q-layer.7": 1401.9565, "encoder_q-layer.8": 1436.8398, "encoder_q-layer.9": 1279.8136, "epoch": 0.72, "inbatch_neg_score": 0.5067, "inbatch_pos_score": 1.2031, "learning_rate": 1.4666666666666668e-05, "loss": 3.0931, "norm_diff": 0.0795, "norm_loss": 0.0, "num_token_doc": 66.9033, "num_token_overlap": 15.837, "num_token_query": 42.277, "num_token_union": 68.5193, "num_word_context": 202.2941, "num_word_doc": 49.9403, "num_word_query": 31.9039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2150.2014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5063, "query_norm": 1.4462, "queue_k_norm": 1.523, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.277, "sent_len_1": 66.9033, "sent_len_max_0": 128.0, "sent_len_max_1": 188.275, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.072, "doc_norm": 1.524, "encoder_q-embeddings": 1010.6945, "encoder_q-layer.0": 658.9386, "encoder_q-layer.1": 685.0446, "encoder_q-layer.10": 1269.6868, "encoder_q-layer.11": 2855.5403, "encoder_q-layer.2": 780.8842, "encoder_q-layer.3": 834.287, "encoder_q-layer.4": 881.8499, "encoder_q-layer.5": 902.9487, "encoder_q-layer.6": 1071.9868, "encoder_q-layer.7": 1121.7903, "encoder_q-layer.8": 1280.3026, "encoder_q-layer.9": 1239.8347, "epoch": 0.72, "inbatch_neg_score": 0.5041, "inbatch_pos_score": 1.1875, "learning_rate": 1.4611111111111112e-05, "loss": 3.072, "norm_diff": 0.0607, "norm_loss": 0.0, "num_token_doc": 66.7144, "num_token_overlap": 15.8416, "num_token_query": 42.3065, "num_token_union": 68.4886, "num_word_context": 202.3807, "num_word_doc": 49.7964, "num_word_query": 31.97, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1904.1456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5039, "query_norm": 1.4633, "queue_k_norm": 1.5247, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3065, "sent_len_1": 66.7144, "sent_len_max_0": 127.9963, "sent_len_max_1": 187.6962, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.09, "doc_norm": 1.5273, "encoder_q-embeddings": 1121.2754, "encoder_q-layer.0": 716.5536, "encoder_q-layer.1": 768.059, "encoder_q-layer.10": 1207.5028, "encoder_q-layer.11": 2739.3354, "encoder_q-layer.2": 920.0419, "encoder_q-layer.3": 946.4979, "encoder_q-layer.4": 1094.5966, "encoder_q-layer.5": 1108.8613, "encoder_q-layer.6": 1230.5144, "encoder_q-layer.7": 1254.7271, "encoder_q-layer.8": 1357.3643, "encoder_q-layer.9": 1168.35, "epoch": 0.72, "inbatch_neg_score": 0.5097, "inbatch_pos_score": 1.2109, "learning_rate": 1.4555555555555556e-05, "loss": 3.09, "norm_diff": 0.0684, "norm_loss": 0.0, "num_token_doc": 66.3163, "num_token_overlap": 15.7601, "num_token_query": 42.2575, "num_token_union": 68.2341, "num_word_context": 201.7166, "num_word_doc": 49.4936, "num_word_query": 31.9296, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1955.2345, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5098, "query_norm": 1.4589, "queue_k_norm": 1.5216, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2575, "sent_len_1": 66.3163, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2525, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0931, "doc_norm": 1.52, "encoder_q-embeddings": 2249.9314, "encoder_q-layer.0": 1588.9965, "encoder_q-layer.1": 1600.1411, "encoder_q-layer.10": 1274.556, "encoder_q-layer.11": 2908.4709, "encoder_q-layer.2": 1922.5876, "encoder_q-layer.3": 1903.568, "encoder_q-layer.4": 2167.0566, "encoder_q-layer.5": 1974.7612, "encoder_q-layer.6": 2145.7131, "encoder_q-layer.7": 2078.3945, "encoder_q-layer.8": 1682.0546, "encoder_q-layer.9": 1267.5529, "epoch": 0.72, "inbatch_neg_score": 0.5079, "inbatch_pos_score": 1.1885, "learning_rate": 1.45e-05, "loss": 3.0931, "norm_diff": 0.0743, "norm_loss": 0.0, "num_token_doc": 66.5552, "num_token_overlap": 15.8108, "num_token_query": 42.3858, "num_token_union": 68.3815, "num_word_context": 202.2147, "num_word_doc": 49.6596, "num_word_query": 32.0363, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2950.8891, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5083, "query_norm": 1.4457, "queue_k_norm": 1.5245, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3858, "sent_len_1": 66.5552, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.57, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.082, "doc_norm": 1.5303, "encoder_q-embeddings": 1272.157, "encoder_q-layer.0": 823.2449, "encoder_q-layer.1": 903.9104, "encoder_q-layer.10": 1245.4789, "encoder_q-layer.11": 2863.0938, "encoder_q-layer.2": 1066.7644, "encoder_q-layer.3": 1145.8872, "encoder_q-layer.4": 1235.7351, "encoder_q-layer.5": 1297.1606, "encoder_q-layer.6": 1342.1782, "encoder_q-layer.7": 1406.1266, "encoder_q-layer.8": 1306.6417, "encoder_q-layer.9": 1120.2188, "epoch": 0.72, "inbatch_neg_score": 0.511, "inbatch_pos_score": 1.1816, "learning_rate": 1.4444444444444444e-05, "loss": 3.082, "norm_diff": 0.0751, "norm_loss": 0.0, "num_token_doc": 66.6909, "num_token_overlap": 15.8269, "num_token_query": 42.2487, "num_token_union": 68.4281, "num_word_context": 201.8732, "num_word_doc": 49.7785, "num_word_query": 31.9251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2098.8221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5122, "query_norm": 1.4552, "queue_k_norm": 1.524, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2487, "sent_len_1": 66.6909, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0938, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0841, "doc_norm": 1.5275, "encoder_q-embeddings": 1213.6768, "encoder_q-layer.0": 802.5245, "encoder_q-layer.1": 884.3963, "encoder_q-layer.10": 1253.4504, "encoder_q-layer.11": 2843.3325, "encoder_q-layer.2": 1037.452, "encoder_q-layer.3": 1093.0372, "encoder_q-layer.4": 1130.5909, "encoder_q-layer.5": 1176.9375, "encoder_q-layer.6": 1261.7723, "encoder_q-layer.7": 1246.8473, "encoder_q-layer.8": 1354.124, "encoder_q-layer.9": 1171.8394, "epoch": 0.72, "inbatch_neg_score": 0.5088, "inbatch_pos_score": 1.2012, "learning_rate": 1.438888888888889e-05, "loss": 3.0841, "norm_diff": 0.0773, "norm_loss": 0.0, "num_token_doc": 66.6186, "num_token_overlap": 15.7741, "num_token_query": 42.3408, "num_token_union": 68.4197, "num_word_context": 201.9443, "num_word_doc": 49.7153, "num_word_query": 31.9993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2017.9534, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5093, "query_norm": 1.4502, "queue_k_norm": 1.5263, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3408, "sent_len_1": 66.6186, "sent_len_max_0": 127.995, "sent_len_max_1": 190.8825, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0846, "doc_norm": 1.5283, "encoder_q-embeddings": 1036.6913, "encoder_q-layer.0": 693.6901, "encoder_q-layer.1": 724.348, "encoder_q-layer.10": 1450.2762, "encoder_q-layer.11": 3037.7681, "encoder_q-layer.2": 822.1238, "encoder_q-layer.3": 855.0604, "encoder_q-layer.4": 906.646, "encoder_q-layer.5": 950.4511, "encoder_q-layer.6": 1025.9548, "encoder_q-layer.7": 1115.5891, "encoder_q-layer.8": 1360.9346, "encoder_q-layer.9": 1234.6952, "epoch": 0.72, "inbatch_neg_score": 0.512, "inbatch_pos_score": 1.1787, "learning_rate": 1.4333333333333334e-05, "loss": 3.0846, "norm_diff": 0.0957, "norm_loss": 0.0, "num_token_doc": 66.9385, "num_token_overlap": 15.8134, "num_token_query": 42.48, "num_token_union": 68.6706, "num_word_context": 202.5979, "num_word_doc": 49.9391, "num_word_query": 32.0888, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1945.1904, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5112, "query_norm": 1.4326, "queue_k_norm": 1.5264, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.48, "sent_len_1": 66.9385, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.7075, "stdk": 0.0487, "stdq": 0.0443, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.1149, "doc_norm": 1.5244, "encoder_q-embeddings": 1507.0077, "encoder_q-layer.0": 1083.9128, "encoder_q-layer.1": 1193.5962, "encoder_q-layer.10": 1317.1482, "encoder_q-layer.11": 2921.9285, "encoder_q-layer.2": 1453.9795, "encoder_q-layer.3": 1457.9932, "encoder_q-layer.4": 1487.0767, "encoder_q-layer.5": 1617.0898, "encoder_q-layer.6": 1731.3447, "encoder_q-layer.7": 1842.4996, "encoder_q-layer.8": 1593.8663, "encoder_q-layer.9": 1210.6755, "epoch": 0.73, "inbatch_neg_score": 0.5123, "inbatch_pos_score": 1.1748, "learning_rate": 1.427777777777778e-05, "loss": 3.1149, "norm_diff": 0.089, "norm_loss": 0.0, "num_token_doc": 66.9626, "num_token_overlap": 15.7705, "num_token_query": 42.2196, "num_token_union": 68.5644, "num_word_context": 202.5149, "num_word_doc": 49.9788, "num_word_query": 31.8892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2459.0898, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5127, "query_norm": 1.4353, "queue_k_norm": 1.5237, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2196, "sent_len_1": 66.9626, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9363, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.1035, "doc_norm": 1.5284, "encoder_q-embeddings": 1912.8295, "encoder_q-layer.0": 1355.5562, "encoder_q-layer.1": 1605.6768, "encoder_q-layer.10": 1301.6766, "encoder_q-layer.11": 2797.4639, "encoder_q-layer.2": 1938.5813, "encoder_q-layer.3": 1875.8806, "encoder_q-layer.4": 1833.944, "encoder_q-layer.5": 1849.3401, "encoder_q-layer.6": 1868.7285, "encoder_q-layer.7": 1748.4886, "encoder_q-layer.8": 1651.0964, "encoder_q-layer.9": 1185.1798, "epoch": 0.73, "inbatch_neg_score": 0.5138, "inbatch_pos_score": 1.207, "learning_rate": 1.4222222222222224e-05, "loss": 3.1035, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.6651, "num_token_overlap": 15.8142, "num_token_query": 42.1972, "num_token_union": 68.353, "num_word_context": 202.282, "num_word_doc": 49.719, "num_word_query": 31.8794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2728.4293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5146, "query_norm": 1.4494, "queue_k_norm": 1.5256, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1972, "sent_len_1": 66.6651, "sent_len_max_0": 128.0, "sent_len_max_1": 190.5075, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 53.0273, "active_queue_size": 16384.0, "cl_loss": 3.0762, "doc_norm": 1.5289, "encoder_q-embeddings": 1235.8673, "encoder_q-layer.0": 843.67, "encoder_q-layer.1": 949.144, "encoder_q-layer.10": 1229.3671, "encoder_q-layer.11": 3078.8577, "encoder_q-layer.2": 1019.0896, "encoder_q-layer.3": 1078.8397, "encoder_q-layer.4": 1110.9138, "encoder_q-layer.5": 1097.5651, "encoder_q-layer.6": 1215.3688, "encoder_q-layer.7": 1324.2849, "encoder_q-layer.8": 1350.0345, "encoder_q-layer.9": 1170.3782, "epoch": 0.73, "inbatch_neg_score": 0.5151, "inbatch_pos_score": 1.1777, "learning_rate": 1.4166666666666668e-05, "loss": 3.0762, "norm_diff": 0.0988, "norm_loss": 0.0, "num_token_doc": 66.6742, "num_token_overlap": 15.805, "num_token_query": 42.29, "num_token_union": 68.3705, "num_word_context": 201.9881, "num_word_doc": 49.738, "num_word_query": 31.9249, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2138.4071, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5166, "query_norm": 1.43, "queue_k_norm": 1.5247, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.29, "sent_len_1": 66.6742, "sent_len_max_0": 128.0, "sent_len_max_1": 188.2887, "stdk": 0.0488, "stdq": 0.0441, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0872, "doc_norm": 1.53, "encoder_q-embeddings": 6525.4805, "encoder_q-layer.0": 3764.1282, "encoder_q-layer.1": 3897.6345, "encoder_q-layer.10": 1176.3311, "encoder_q-layer.11": 2851.4399, "encoder_q-layer.2": 3832.5203, "encoder_q-layer.3": 3727.3076, "encoder_q-layer.4": 3219.6379, "encoder_q-layer.5": 2924.314, "encoder_q-layer.6": 2751.0872, "encoder_q-layer.7": 2471.9004, "encoder_q-layer.8": 2204.0042, "encoder_q-layer.9": 1406.0389, "epoch": 0.73, "inbatch_neg_score": 0.5177, "inbatch_pos_score": 1.2051, "learning_rate": 1.4111111111111112e-05, "loss": 3.0872, "norm_diff": 0.0766, "norm_loss": 0.0, "num_token_doc": 66.7987, "num_token_overlap": 15.7672, "num_token_query": 42.3371, "num_token_union": 68.5465, "num_word_context": 202.5511, "num_word_doc": 49.83, "num_word_query": 31.975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5556.5556, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5176, "query_norm": 1.4533, "queue_k_norm": 1.5269, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3371, "sent_len_1": 66.7987, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9925, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.0602, "doc_norm": 1.5247, "encoder_q-embeddings": 1012.7166, "encoder_q-layer.0": 660.9114, "encoder_q-layer.1": 709.2076, "encoder_q-layer.10": 1196.7428, "encoder_q-layer.11": 2708.6323, "encoder_q-layer.2": 794.2752, "encoder_q-layer.3": 816.7854, "encoder_q-layer.4": 858.4086, "encoder_q-layer.5": 922.445, "encoder_q-layer.6": 1053.9274, "encoder_q-layer.7": 1190.9763, "encoder_q-layer.8": 1309.4751, "encoder_q-layer.9": 1156.5923, "epoch": 0.73, "inbatch_neg_score": 0.518, "inbatch_pos_score": 1.2197, "learning_rate": 1.4055555555555556e-05, "loss": 3.0602, "norm_diff": 0.0557, "norm_loss": 0.0, "num_token_doc": 66.6873, "num_token_overlap": 15.8108, "num_token_query": 42.4324, "num_token_union": 68.4635, "num_word_context": 202.0195, "num_word_doc": 49.7473, "num_word_query": 32.0382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1845.4911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5171, "query_norm": 1.469, "queue_k_norm": 1.5255, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4324, "sent_len_1": 66.6873, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.9837, "stdk": 0.0485, "stdq": 0.0457, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0987, "doc_norm": 1.5284, "encoder_q-embeddings": 1019.7711, "encoder_q-layer.0": 656.8214, "encoder_q-layer.1": 707.7091, "encoder_q-layer.10": 1206.3832, "encoder_q-layer.11": 2914.8218, "encoder_q-layer.2": 776.5919, "encoder_q-layer.3": 835.5172, "encoder_q-layer.4": 873.0744, "encoder_q-layer.5": 884.2229, "encoder_q-layer.6": 983.3077, "encoder_q-layer.7": 1112.2695, "encoder_q-layer.8": 1300.1046, "encoder_q-layer.9": 1175.4862, "epoch": 0.73, "inbatch_neg_score": 0.5185, "inbatch_pos_score": 1.2041, "learning_rate": 1.4000000000000001e-05, "loss": 3.0987, "norm_diff": 0.0809, "norm_loss": 0.0, "num_token_doc": 66.6446, "num_token_overlap": 15.7516, "num_token_query": 42.2488, "num_token_union": 68.4911, "num_word_context": 202.1136, "num_word_doc": 49.7237, "num_word_query": 31.9235, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1899.5463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5186, "query_norm": 1.4475, "queue_k_norm": 1.5286, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2488, "sent_len_1": 66.6446, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.5075, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0855, "doc_norm": 1.5327, "encoder_q-embeddings": 1509.2034, "encoder_q-layer.0": 1007.2787, "encoder_q-layer.1": 1077.2865, "encoder_q-layer.10": 1271.1305, "encoder_q-layer.11": 2975.3904, "encoder_q-layer.2": 1227.8306, "encoder_q-layer.3": 1303.7982, "encoder_q-layer.4": 1385.8557, "encoder_q-layer.5": 1435.9176, "encoder_q-layer.6": 1554.0221, "encoder_q-layer.7": 1692.16, "encoder_q-layer.8": 1590.5022, "encoder_q-layer.9": 1239.1305, "epoch": 0.73, "inbatch_neg_score": 0.5185, "inbatch_pos_score": 1.1914, "learning_rate": 1.3944444444444446e-05, "loss": 3.0855, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.6904, "num_token_overlap": 15.8575, "num_token_query": 42.3734, "num_token_union": 68.4724, "num_word_context": 202.1658, "num_word_doc": 49.7878, "num_word_query": 31.9991, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2337.1183, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.519, "query_norm": 1.4578, "queue_k_norm": 1.528, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3734, "sent_len_1": 66.6904, "sent_len_max_0": 127.9912, "sent_len_max_1": 186.4062, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0834, "doc_norm": 1.5308, "encoder_q-embeddings": 1444.6802, "encoder_q-layer.0": 1018.4301, "encoder_q-layer.1": 1141.1056, "encoder_q-layer.10": 1229.9827, "encoder_q-layer.11": 2702.1399, "encoder_q-layer.2": 1270.5483, "encoder_q-layer.3": 1282.9054, "encoder_q-layer.4": 1354.0499, "encoder_q-layer.5": 1383.2795, "encoder_q-layer.6": 1457.4365, "encoder_q-layer.7": 1459.324, "encoder_q-layer.8": 1443.001, "encoder_q-layer.9": 1185.8983, "epoch": 0.73, "inbatch_neg_score": 0.5189, "inbatch_pos_score": 1.2168, "learning_rate": 1.388888888888889e-05, "loss": 3.0834, "norm_diff": 0.0638, "norm_loss": 0.0, "num_token_doc": 66.8261, "num_token_overlap": 15.8467, "num_token_query": 42.3096, "num_token_union": 68.5214, "num_word_context": 202.3463, "num_word_doc": 49.9036, "num_word_query": 31.9747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2209.7603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.52, "query_norm": 1.467, "queue_k_norm": 1.5297, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3096, "sent_len_1": 66.8261, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.9062, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0821, "doc_norm": 1.5301, "encoder_q-embeddings": 2356.0291, "encoder_q-layer.0": 1478.3065, "encoder_q-layer.1": 1559.0214, "encoder_q-layer.10": 2721.054, "encoder_q-layer.11": 5863.4995, "encoder_q-layer.2": 1746.9326, "encoder_q-layer.3": 1814.4962, "encoder_q-layer.4": 1949.6466, "encoder_q-layer.5": 2139.3542, "encoder_q-layer.6": 2308.9678, "encoder_q-layer.7": 2447.5667, "encoder_q-layer.8": 2693.8328, "encoder_q-layer.9": 2421.9719, "epoch": 0.73, "inbatch_neg_score": 0.5225, "inbatch_pos_score": 1.21, "learning_rate": 1.3833333333333334e-05, "loss": 3.0821, "norm_diff": 0.0705, "norm_loss": 0.0, "num_token_doc": 66.66, "num_token_overlap": 15.9152, "num_token_query": 42.6072, "num_token_union": 68.4842, "num_word_context": 202.0303, "num_word_doc": 49.7516, "num_word_query": 32.2022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4007.8657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.4596, "queue_k_norm": 1.5294, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.6072, "sent_len_1": 66.66, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5962, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0934, "doc_norm": 1.5279, "encoder_q-embeddings": 3422.0183, "encoder_q-layer.0": 2435.2866, "encoder_q-layer.1": 2709.6672, "encoder_q-layer.10": 2431.5847, "encoder_q-layer.11": 5664.6787, "encoder_q-layer.2": 2989.6467, "encoder_q-layer.3": 3474.5015, "encoder_q-layer.4": 3612.3083, "encoder_q-layer.5": 3718.2336, "encoder_q-layer.6": 3880.9839, "encoder_q-layer.7": 3565.3621, "encoder_q-layer.8": 3168.2634, "encoder_q-layer.9": 2406.5894, "epoch": 0.73, "inbatch_neg_score": 0.5284, "inbatch_pos_score": 1.2158, "learning_rate": 1.3777777777777778e-05, "loss": 3.0934, "norm_diff": 0.0652, "norm_loss": 0.0, "num_token_doc": 66.8213, "num_token_overlap": 15.8253, "num_token_query": 42.407, "num_token_union": 68.5348, "num_word_context": 202.5882, "num_word_doc": 49.8446, "num_word_query": 32.0324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5139.5561, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5273, "query_norm": 1.4627, "queue_k_norm": 1.5284, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.407, "sent_len_1": 66.8213, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6362, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0766, "doc_norm": 1.5231, "encoder_q-embeddings": 2020.2495, "encoder_q-layer.0": 1308.1566, "encoder_q-layer.1": 1353.0653, "encoder_q-layer.10": 2799.0933, "encoder_q-layer.11": 5829.2397, "encoder_q-layer.2": 1537.2478, "encoder_q-layer.3": 1667.1865, "encoder_q-layer.4": 1794.7584, "encoder_q-layer.5": 1830.1243, "encoder_q-layer.6": 2007.2733, "encoder_q-layer.7": 2260.3044, "encoder_q-layer.8": 2558.8164, "encoder_q-layer.9": 2351.0376, "epoch": 0.74, "inbatch_neg_score": 0.5208, "inbatch_pos_score": 1.21, "learning_rate": 1.3722222222222222e-05, "loss": 3.0766, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.8467, "num_token_overlap": 15.8348, "num_token_query": 42.2722, "num_token_union": 68.3992, "num_word_context": 202.098, "num_word_doc": 49.8229, "num_word_query": 31.9207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3768.4759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5205, "query_norm": 1.4628, "queue_k_norm": 1.5284, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2722, "sent_len_1": 66.8467, "sent_len_max_0": 128.0, "sent_len_max_1": 193.31, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.0799, "doc_norm": 1.5302, "encoder_q-embeddings": 5209.7793, "encoder_q-layer.0": 3946.1372, "encoder_q-layer.1": 4432.3916, "encoder_q-layer.10": 2401.9402, "encoder_q-layer.11": 5580.0332, "encoder_q-layer.2": 4931.0674, "encoder_q-layer.3": 4984.084, "encoder_q-layer.4": 5134.1489, "encoder_q-layer.5": 5105.4048, "encoder_q-layer.6": 4950.938, "encoder_q-layer.7": 4084.8176, "encoder_q-layer.8": 3246.5674, "encoder_q-layer.9": 2414.4697, "epoch": 0.74, "inbatch_neg_score": 0.5213, "inbatch_pos_score": 1.2178, "learning_rate": 1.3666666666666666e-05, "loss": 3.0799, "norm_diff": 0.0699, "norm_loss": 0.0, "num_token_doc": 66.65, "num_token_overlap": 15.823, "num_token_query": 42.4166, "num_token_union": 68.4663, "num_word_context": 202.0852, "num_word_doc": 49.7288, "num_word_query": 32.0746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6691.1736, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.522, "query_norm": 1.4603, "queue_k_norm": 1.5292, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4166, "sent_len_1": 66.65, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2625, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0862, "doc_norm": 1.5337, "encoder_q-embeddings": 2189.0476, "encoder_q-layer.0": 1454.3372, "encoder_q-layer.1": 1565.0957, "encoder_q-layer.10": 2625.9041, "encoder_q-layer.11": 5837.3486, "encoder_q-layer.2": 1813.2957, "encoder_q-layer.3": 1841.0383, "encoder_q-layer.4": 1951.0347, "encoder_q-layer.5": 1980.6512, "encoder_q-layer.6": 2113.3645, "encoder_q-layer.7": 2219.8, "encoder_q-layer.8": 2646.0437, "encoder_q-layer.9": 2396.4465, "epoch": 0.74, "inbatch_neg_score": 0.5235, "inbatch_pos_score": 1.1953, "learning_rate": 1.3611111111111111e-05, "loss": 3.0862, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.7348, "num_token_overlap": 15.8105, "num_token_query": 42.3665, "num_token_union": 68.4907, "num_word_context": 202.1898, "num_word_doc": 49.8205, "num_word_query": 32.0199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3947.7238, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.4477, "queue_k_norm": 1.5304, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3665, "sent_len_1": 66.7348, "sent_len_max_0": 128.0, "sent_len_max_1": 189.77, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0924, "doc_norm": 1.5244, "encoder_q-embeddings": 1991.0989, "encoder_q-layer.0": 1336.1772, "encoder_q-layer.1": 1420.8044, "encoder_q-layer.10": 2578.1987, "encoder_q-layer.11": 5588.3833, "encoder_q-layer.2": 1674.5735, "encoder_q-layer.3": 1718.4415, "encoder_q-layer.4": 1824.8326, "encoder_q-layer.5": 1850.4354, "encoder_q-layer.6": 2071.4927, "encoder_q-layer.7": 2267.4255, "encoder_q-layer.8": 2473.1453, "encoder_q-layer.9": 2247.6169, "epoch": 0.74, "inbatch_neg_score": 0.5281, "inbatch_pos_score": 1.1982, "learning_rate": 1.3555555555555557e-05, "loss": 3.0924, "norm_diff": 0.0755, "norm_loss": 0.0, "num_token_doc": 66.8106, "num_token_overlap": 15.7686, "num_token_query": 42.1805, "num_token_union": 68.3957, "num_word_context": 202.196, "num_word_doc": 49.7996, "num_word_query": 31.8496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3750.7978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5269, "query_norm": 1.4489, "queue_k_norm": 1.5298, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1805, "sent_len_1": 66.8106, "sent_len_max_0": 128.0, "sent_len_max_1": 192.505, "stdk": 0.0484, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.063, "doc_norm": 1.5311, "encoder_q-embeddings": 2514.6265, "encoder_q-layer.0": 1682.6229, "encoder_q-layer.1": 1945.1846, "encoder_q-layer.10": 2577.3564, "encoder_q-layer.11": 5607.2769, "encoder_q-layer.2": 2473.8682, "encoder_q-layer.3": 2627.6868, "encoder_q-layer.4": 2860.5767, "encoder_q-layer.5": 2956.3137, "encoder_q-layer.6": 3228.4387, "encoder_q-layer.7": 3395.6084, "encoder_q-layer.8": 3315.2747, "encoder_q-layer.9": 2513.6628, "epoch": 0.74, "inbatch_neg_score": 0.5258, "inbatch_pos_score": 1.2188, "learning_rate": 1.3500000000000001e-05, "loss": 3.063, "norm_diff": 0.0661, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 15.8088, "num_token_query": 42.3233, "num_token_union": 68.4124, "num_word_context": 202.4112, "num_word_doc": 49.7491, "num_word_query": 31.9625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4538.4792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5249, "query_norm": 1.4649, "queue_k_norm": 1.5305, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3233, "sent_len_1": 66.6695, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1863, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0643, "doc_norm": 1.5272, "encoder_q-embeddings": 2230.4644, "encoder_q-layer.0": 1485.9644, "encoder_q-layer.1": 1642.6969, "encoder_q-layer.10": 2495.1658, "encoder_q-layer.11": 5646.3086, "encoder_q-layer.2": 1806.3561, "encoder_q-layer.3": 1832.5612, "encoder_q-layer.4": 1914.0034, "encoder_q-layer.5": 2020.0774, "encoder_q-layer.6": 2064.1875, "encoder_q-layer.7": 2276.3474, "encoder_q-layer.8": 2592.1445, "encoder_q-layer.9": 2351.0737, "epoch": 0.74, "inbatch_neg_score": 0.5287, "inbatch_pos_score": 1.2139, "learning_rate": 1.3444444444444445e-05, "loss": 3.0643, "norm_diff": 0.0713, "norm_loss": 0.0, "num_token_doc": 66.7879, "num_token_overlap": 15.8517, "num_token_query": 42.3401, "num_token_union": 68.5093, "num_word_context": 202.5708, "num_word_doc": 49.8393, "num_word_query": 32.0023, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3881.3816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5264, "query_norm": 1.4559, "queue_k_norm": 1.5314, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3401, "sent_len_1": 66.7879, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2275, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.072, "doc_norm": 1.534, "encoder_q-embeddings": 3055.1064, "encoder_q-layer.0": 2100.4077, "encoder_q-layer.1": 2392.4053, "encoder_q-layer.10": 2619.3528, "encoder_q-layer.11": 5955.1274, "encoder_q-layer.2": 2755.3525, "encoder_q-layer.3": 2977.9612, "encoder_q-layer.4": 2894.1292, "encoder_q-layer.5": 3177.0569, "encoder_q-layer.6": 3302.7661, "encoder_q-layer.7": 3412.394, "encoder_q-layer.8": 3208.1963, "encoder_q-layer.9": 2462.4333, "epoch": 0.74, "inbatch_neg_score": 0.5271, "inbatch_pos_score": 1.1992, "learning_rate": 1.338888888888889e-05, "loss": 3.072, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.7047, "num_token_overlap": 15.8092, "num_token_query": 42.3567, "num_token_union": 68.4688, "num_word_context": 202.0024, "num_word_doc": 49.7843, "num_word_query": 32.0033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4882.3572, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5293, "query_norm": 1.4491, "queue_k_norm": 1.5302, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3567, "sent_len_1": 66.7047, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.8487, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0841, "doc_norm": 1.5346, "encoder_q-embeddings": 2176.5452, "encoder_q-layer.0": 1459.8802, "encoder_q-layer.1": 1549.6307, "encoder_q-layer.10": 2393.3179, "encoder_q-layer.11": 5565.1411, "encoder_q-layer.2": 1760.8661, "encoder_q-layer.3": 1825.9041, "encoder_q-layer.4": 1919.3562, "encoder_q-layer.5": 1890.7634, "encoder_q-layer.6": 2178.1746, "encoder_q-layer.7": 2423.176, "encoder_q-layer.8": 2610.2791, "encoder_q-layer.9": 2291.7925, "epoch": 0.74, "inbatch_neg_score": 0.5245, "inbatch_pos_score": 1.2139, "learning_rate": 1.3333333333333333e-05, "loss": 3.0841, "norm_diff": 0.0826, "norm_loss": 0.0, "num_token_doc": 66.7732, "num_token_overlap": 15.7839, "num_token_query": 42.2104, "num_token_union": 68.3937, "num_word_context": 202.3009, "num_word_doc": 49.8042, "num_word_query": 31.8673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3823.0491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5239, "query_norm": 1.4521, "queue_k_norm": 1.5315, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2104, "sent_len_1": 66.7732, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9375, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 3.0715, "doc_norm": 1.5274, "encoder_q-embeddings": 1077.5571, "encoder_q-layer.0": 738.5123, "encoder_q-layer.1": 801.7188, "encoder_q-layer.10": 1215.7406, "encoder_q-layer.11": 2784.4062, "encoder_q-layer.2": 921.6659, "encoder_q-layer.3": 981.5074, "encoder_q-layer.4": 1132.6527, "encoder_q-layer.5": 1096.4874, "encoder_q-layer.6": 1196.7865, "encoder_q-layer.7": 1425.2312, "encoder_q-layer.8": 1413.3859, "encoder_q-layer.9": 1167.3339, "epoch": 0.74, "inbatch_neg_score": 0.5271, "inbatch_pos_score": 1.2363, "learning_rate": 1.3277777777777777e-05, "loss": 3.0715, "norm_diff": 0.0701, "norm_loss": 0.0, "num_token_doc": 66.6886, "num_token_overlap": 15.852, "num_token_query": 42.2792, "num_token_union": 68.391, "num_word_context": 202.068, "num_word_doc": 49.7643, "num_word_query": 31.9447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1996.3612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5273, "query_norm": 1.4573, "queue_k_norm": 1.5308, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2792, "sent_len_1": 66.6886, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9525, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.0808, "doc_norm": 1.5343, "encoder_q-embeddings": 1644.201, "encoder_q-layer.0": 1163.7433, "encoder_q-layer.1": 1347.269, "encoder_q-layer.10": 1278.8452, "encoder_q-layer.11": 2810.7161, "encoder_q-layer.2": 1555.4486, "encoder_q-layer.3": 1744.6182, "encoder_q-layer.4": 1983.1608, "encoder_q-layer.5": 1935.6481, "encoder_q-layer.6": 1921.418, "encoder_q-layer.7": 1874.9578, "encoder_q-layer.8": 1706.8215, "encoder_q-layer.9": 1204.7015, "epoch": 0.74, "inbatch_neg_score": 0.5335, "inbatch_pos_score": 1.2344, "learning_rate": 1.3222222222222221e-05, "loss": 3.0808, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.5033, "num_token_overlap": 15.8249, "num_token_query": 42.2687, "num_token_union": 68.31, "num_word_context": 201.946, "num_word_doc": 49.6482, "num_word_query": 31.9158, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2610.2435, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5342, "query_norm": 1.4551, "queue_k_norm": 1.5305, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2687, "sent_len_1": 66.5033, "sent_len_max_0": 128.0, "sent_len_max_1": 189.125, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0739, "doc_norm": 1.5304, "encoder_q-embeddings": 2191.7515, "encoder_q-layer.0": 1469.9681, "encoder_q-layer.1": 1760.2195, "encoder_q-layer.10": 1297.5485, "encoder_q-layer.11": 2979.7048, "encoder_q-layer.2": 2111.0349, "encoder_q-layer.3": 2260.1968, "encoder_q-layer.4": 2226.5847, "encoder_q-layer.5": 1944.2695, "encoder_q-layer.6": 2033.892, "encoder_q-layer.7": 1946.594, "encoder_q-layer.8": 1684.8875, "encoder_q-layer.9": 1227.1471, "epoch": 0.74, "inbatch_neg_score": 0.5325, "inbatch_pos_score": 1.2041, "learning_rate": 1.3166666666666665e-05, "loss": 3.0739, "norm_diff": 0.0721, "norm_loss": 0.0, "num_token_doc": 66.7164, "num_token_overlap": 15.9076, "num_token_query": 42.4824, "num_token_union": 68.4628, "num_word_context": 202.187, "num_word_doc": 49.8151, "num_word_query": 32.1096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2976.2711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5332, "query_norm": 1.4583, "queue_k_norm": 1.5307, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4824, "sent_len_1": 66.7164, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8988, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 52.832, "active_queue_size": 16384.0, "cl_loss": 3.0986, "doc_norm": 1.5301, "encoder_q-embeddings": 1122.8947, "encoder_q-layer.0": 772.8031, "encoder_q-layer.1": 841.9973, "encoder_q-layer.10": 1299.7255, "encoder_q-layer.11": 2866.7024, "encoder_q-layer.2": 963.1198, "encoder_q-layer.3": 996.0787, "encoder_q-layer.4": 1032.1355, "encoder_q-layer.5": 1040.9696, "encoder_q-layer.6": 1137.2693, "encoder_q-layer.7": 1228.2039, "encoder_q-layer.8": 1377.9655, "encoder_q-layer.9": 1197.7543, "epoch": 0.75, "inbatch_neg_score": 0.5368, "inbatch_pos_score": 1.208, "learning_rate": 1.3111111111111113e-05, "loss": 3.0986, "norm_diff": 0.0729, "norm_loss": 0.0, "num_token_doc": 66.6539, "num_token_overlap": 15.8327, "num_token_query": 42.3976, "num_token_union": 68.4151, "num_word_context": 202.3923, "num_word_doc": 49.7201, "num_word_query": 32.0515, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2011.5449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.4572, "queue_k_norm": 1.532, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3976, "sent_len_1": 66.6539, "sent_len_max_0": 128.0, "sent_len_max_1": 189.44, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0887, "doc_norm": 1.5343, "encoder_q-embeddings": 1638.1116, "encoder_q-layer.0": 1078.8517, "encoder_q-layer.1": 1134.1592, "encoder_q-layer.10": 1197.2976, "encoder_q-layer.11": 2881.7183, "encoder_q-layer.2": 1315.9414, "encoder_q-layer.3": 1382.9152, "encoder_q-layer.4": 1492.1539, "encoder_q-layer.5": 1513.2451, "encoder_q-layer.6": 1516.6179, "encoder_q-layer.7": 1477.7556, "encoder_q-layer.8": 1439.8413, "encoder_q-layer.9": 1169.6865, "epoch": 0.75, "inbatch_neg_score": 0.539, "inbatch_pos_score": 1.2197, "learning_rate": 1.3055555555555557e-05, "loss": 3.0887, "norm_diff": 0.0708, "norm_loss": 0.0, "num_token_doc": 66.7052, "num_token_overlap": 15.7668, "num_token_query": 42.2796, "num_token_union": 68.4773, "num_word_context": 202.4363, "num_word_doc": 49.7952, "num_word_query": 31.9361, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2366.1174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5386, "query_norm": 1.4635, "queue_k_norm": 1.5327, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2796, "sent_len_1": 66.7052, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.1037, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0745, "doc_norm": 1.5292, "encoder_q-embeddings": 1035.6858, "encoder_q-layer.0": 660.4666, "encoder_q-layer.1": 722.6246, "encoder_q-layer.10": 1235.3037, "encoder_q-layer.11": 2687.8311, "encoder_q-layer.2": 828.71, "encoder_q-layer.3": 894.266, "encoder_q-layer.4": 910.7661, "encoder_q-layer.5": 985.6338, "encoder_q-layer.6": 1089.1216, "encoder_q-layer.7": 1213.9435, "encoder_q-layer.8": 1323.9218, "encoder_q-layer.9": 1108.5906, "epoch": 0.75, "inbatch_neg_score": 0.5359, "inbatch_pos_score": 1.2432, "learning_rate": 1.3000000000000001e-05, "loss": 3.0745, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.684, "num_token_overlap": 15.8155, "num_token_query": 42.3172, "num_token_union": 68.4404, "num_word_context": 202.1869, "num_word_doc": 49.7082, "num_word_query": 31.9618, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1868.5176, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.4672, "queue_k_norm": 1.5335, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3172, "sent_len_1": 66.684, "sent_len_max_0": 127.99, "sent_len_max_1": 192.43, "stdk": 0.0485, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0815, "doc_norm": 1.5365, "encoder_q-embeddings": 1109.5066, "encoder_q-layer.0": 771.7357, "encoder_q-layer.1": 869.1213, "encoder_q-layer.10": 1404.1322, "encoder_q-layer.11": 3085.4446, "encoder_q-layer.2": 983.4943, "encoder_q-layer.3": 1030.9578, "encoder_q-layer.4": 1041.6238, "encoder_q-layer.5": 1038.3308, "encoder_q-layer.6": 1137.1847, "encoder_q-layer.7": 1199.0684, "encoder_q-layer.8": 1286.4205, "encoder_q-layer.9": 1316.0613, "epoch": 0.75, "inbatch_neg_score": 0.5355, "inbatch_pos_score": 1.2285, "learning_rate": 1.2944444444444445e-05, "loss": 3.0815, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 15.829, "num_token_query": 42.263, "num_token_union": 68.4803, "num_word_context": 202.178, "num_word_doc": 49.887, "num_word_query": 31.9418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2013.0476, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.4573, "queue_k_norm": 1.533, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.263, "sent_len_1": 66.8275, "sent_len_max_0": 128.0, "sent_len_max_1": 188.4, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0717, "doc_norm": 1.536, "encoder_q-embeddings": 1256.4412, "encoder_q-layer.0": 879.1849, "encoder_q-layer.1": 1018.5477, "encoder_q-layer.10": 1286.8452, "encoder_q-layer.11": 2741.9502, "encoder_q-layer.2": 1179.8666, "encoder_q-layer.3": 1160.1206, "encoder_q-layer.4": 1194.8003, "encoder_q-layer.5": 1120.4836, "encoder_q-layer.6": 1341.4775, "encoder_q-layer.7": 1282.1093, "encoder_q-layer.8": 1259.339, "encoder_q-layer.9": 1159.2323, "epoch": 0.75, "inbatch_neg_score": 0.5355, "inbatch_pos_score": 1.2139, "learning_rate": 1.2888888888888889e-05, "loss": 3.0717, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.6708, "num_token_overlap": 15.8272, "num_token_query": 42.3491, "num_token_union": 68.3935, "num_word_context": 202.12, "num_word_doc": 49.7147, "num_word_query": 31.9833, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2061.7378, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5352, "query_norm": 1.4511, "queue_k_norm": 1.5336, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3491, "sent_len_1": 66.6708, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8738, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0643, "doc_norm": 1.5389, "encoder_q-embeddings": 1259.7593, "encoder_q-layer.0": 879.1061, "encoder_q-layer.1": 982.7253, "encoder_q-layer.10": 1292.626, "encoder_q-layer.11": 2953.9614, "encoder_q-layer.2": 1083.1514, "encoder_q-layer.3": 1103.5938, "encoder_q-layer.4": 1139.0499, "encoder_q-layer.5": 1166.7987, "encoder_q-layer.6": 1254.8188, "encoder_q-layer.7": 1224.5918, "encoder_q-layer.8": 1313.1396, "encoder_q-layer.9": 1134.2258, "epoch": 0.75, "inbatch_neg_score": 0.5381, "inbatch_pos_score": 1.2168, "learning_rate": 1.2833333333333333e-05, "loss": 3.0643, "norm_diff": 0.0693, "norm_loss": 0.0, "num_token_doc": 66.7903, "num_token_overlap": 15.8647, "num_token_query": 42.4892, "num_token_union": 68.5554, "num_word_context": 202.5687, "num_word_doc": 49.8431, "num_word_query": 32.0757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2122.7872, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5381, "query_norm": 1.4696, "queue_k_norm": 1.5332, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4892, "sent_len_1": 66.7903, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0938, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.0765, "doc_norm": 1.5334, "encoder_q-embeddings": 1952.8149, "encoder_q-layer.0": 1368.8395, "encoder_q-layer.1": 1645.2327, "encoder_q-layer.10": 1320.6399, "encoder_q-layer.11": 2948.1799, "encoder_q-layer.2": 1878.0625, "encoder_q-layer.3": 1911.1086, "encoder_q-layer.4": 1854.1183, "encoder_q-layer.5": 1791.3446, "encoder_q-layer.6": 1671.5432, "encoder_q-layer.7": 1648.5455, "encoder_q-layer.8": 1459.7595, "encoder_q-layer.9": 1310.6283, "epoch": 0.75, "inbatch_neg_score": 0.5424, "inbatch_pos_score": 1.2119, "learning_rate": 1.2777777777777777e-05, "loss": 3.0765, "norm_diff": 0.0649, "norm_loss": 0.0, "num_token_doc": 66.7455, "num_token_overlap": 15.8101, "num_token_query": 42.3092, "num_token_union": 68.4784, "num_word_context": 202.3282, "num_word_doc": 49.8453, "num_word_query": 31.9621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2727.2797, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5425, "query_norm": 1.4685, "queue_k_norm": 1.5357, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3092, "sent_len_1": 66.7455, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.4588, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.085, "doc_norm": 1.5361, "encoder_q-embeddings": 2018.9131, "encoder_q-layer.0": 1317.7946, "encoder_q-layer.1": 1494.5125, "encoder_q-layer.10": 1500.0239, "encoder_q-layer.11": 3109.5886, "encoder_q-layer.2": 1868.2361, "encoder_q-layer.3": 1865.8079, "encoder_q-layer.4": 2082.0798, "encoder_q-layer.5": 2119.2356, "encoder_q-layer.6": 1968.5042, "encoder_q-layer.7": 1897.9799, "encoder_q-layer.8": 1803.3357, "encoder_q-layer.9": 1357.6006, "epoch": 0.75, "inbatch_neg_score": 0.5426, "inbatch_pos_score": 1.2197, "learning_rate": 1.2722222222222221e-05, "loss": 3.085, "norm_diff": 0.0675, "norm_loss": 0.0, "num_token_doc": 66.7191, "num_token_overlap": 15.7542, "num_token_query": 42.1621, "num_token_union": 68.4219, "num_word_context": 202.0523, "num_word_doc": 49.7476, "num_word_query": 31.8279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2852.961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.542, "query_norm": 1.4685, "queue_k_norm": 1.5367, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1621, "sent_len_1": 66.7191, "sent_len_max_0": 127.9912, "sent_len_max_1": 189.29, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0704, "doc_norm": 1.5319, "encoder_q-embeddings": 1266.3833, "encoder_q-layer.0": 835.0628, "encoder_q-layer.1": 943.4744, "encoder_q-layer.10": 1337.2095, "encoder_q-layer.11": 2871.4307, "encoder_q-layer.2": 1035.8928, "encoder_q-layer.3": 1082.0209, "encoder_q-layer.4": 1120.0326, "encoder_q-layer.5": 1225.0833, "encoder_q-layer.6": 1328.3114, "encoder_q-layer.7": 1324.8177, "encoder_q-layer.8": 1370.9061, "encoder_q-layer.9": 1239.9407, "epoch": 0.75, "inbatch_neg_score": 0.5432, "inbatch_pos_score": 1.2646, "learning_rate": 1.2666666666666668e-05, "loss": 3.0704, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 66.6741, "num_token_overlap": 15.8452, "num_token_query": 42.464, "num_token_union": 68.4918, "num_word_context": 202.6308, "num_word_doc": 49.7482, "num_word_query": 32.1046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2087.1372, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5439, "query_norm": 1.4951, "queue_k_norm": 1.535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.464, "sent_len_1": 66.6741, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7688, "stdk": 0.0485, "stdq": 0.0462, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0695, "doc_norm": 1.5406, "encoder_q-embeddings": 1235.2981, "encoder_q-layer.0": 819.1303, "encoder_q-layer.1": 944.1814, "encoder_q-layer.10": 1137.4443, "encoder_q-layer.11": 2839.1353, "encoder_q-layer.2": 1105.5371, "encoder_q-layer.3": 1216.6556, "encoder_q-layer.4": 1249.5604, "encoder_q-layer.5": 1231.9664, "encoder_q-layer.6": 1281.9985, "encoder_q-layer.7": 1359.2769, "encoder_q-layer.8": 1399.0264, "encoder_q-layer.9": 1174.8708, "epoch": 0.75, "inbatch_neg_score": 0.5428, "inbatch_pos_score": 1.2305, "learning_rate": 1.2611111111111113e-05, "loss": 3.0695, "norm_diff": 0.0654, "norm_loss": 0.0, "num_token_doc": 66.8322, "num_token_overlap": 15.8434, "num_token_query": 42.4411, "num_token_union": 68.5368, "num_word_context": 202.3685, "num_word_doc": 49.8148, "num_word_query": 32.0364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2103.1963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.543, "query_norm": 1.4752, "queue_k_norm": 1.5352, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4411, "sent_len_1": 66.8322, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4875, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0832, "doc_norm": 1.5322, "encoder_q-embeddings": 1473.0394, "encoder_q-layer.0": 1015.5953, "encoder_q-layer.1": 1202.8276, "encoder_q-layer.10": 1161.8575, "encoder_q-layer.11": 2693.6602, "encoder_q-layer.2": 1409.0938, "encoder_q-layer.3": 1395.9512, "encoder_q-layer.4": 1438.6747, "encoder_q-layer.5": 1449.1288, "encoder_q-layer.6": 1680.2609, "encoder_q-layer.7": 1651.5734, "encoder_q-layer.8": 1522.8439, "encoder_q-layer.9": 1244.7953, "epoch": 0.76, "inbatch_neg_score": 0.5484, "inbatch_pos_score": 1.2256, "learning_rate": 1.2555555555555557e-05, "loss": 3.0832, "norm_diff": 0.06, "norm_loss": 0.0, "num_token_doc": 66.928, "num_token_overlap": 15.8413, "num_token_query": 42.2687, "num_token_union": 68.5395, "num_word_context": 202.4318, "num_word_doc": 49.9481, "num_word_query": 31.9195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2314.0463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5479, "query_norm": 1.4722, "queue_k_norm": 1.5354, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2687, "sent_len_1": 66.928, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2775, "stdk": 0.0485, "stdq": 0.0452, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0888, "doc_norm": 1.5386, "encoder_q-embeddings": 1075.0856, "encoder_q-layer.0": 722.3475, "encoder_q-layer.1": 796.335, "encoder_q-layer.10": 1188.5334, "encoder_q-layer.11": 2761.2983, "encoder_q-layer.2": 934.7231, "encoder_q-layer.3": 1003.687, "encoder_q-layer.4": 1058.0686, "encoder_q-layer.5": 1111.8768, "encoder_q-layer.6": 1139.3246, "encoder_q-layer.7": 1223.2184, "encoder_q-layer.8": 1266.3142, "encoder_q-layer.9": 1158.8984, "epoch": 0.76, "inbatch_neg_score": 0.5462, "inbatch_pos_score": 1.2383, "learning_rate": 1.25e-05, "loss": 3.0888, "norm_diff": 0.082, "norm_loss": 0.0, "num_token_doc": 66.8377, "num_token_overlap": 15.8504, "num_token_query": 42.3896, "num_token_union": 68.4929, "num_word_context": 202.5112, "num_word_doc": 49.8475, "num_word_query": 32.026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1952.1614, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5454, "query_norm": 1.4566, "queue_k_norm": 1.5364, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3896, "sent_len_1": 66.8377, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8837, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.0574, "doc_norm": 1.5311, "encoder_q-embeddings": 1183.484, "encoder_q-layer.0": 822.2183, "encoder_q-layer.1": 896.7289, "encoder_q-layer.10": 1346.0828, "encoder_q-layer.11": 3053.5679, "encoder_q-layer.2": 1028.4414, "encoder_q-layer.3": 1059.7343, "encoder_q-layer.4": 1127.5757, "encoder_q-layer.5": 1191.8976, "encoder_q-layer.6": 1213.6163, "encoder_q-layer.7": 1299.9171, "encoder_q-layer.8": 1377.1807, "encoder_q-layer.9": 1236.7731, "epoch": 0.76, "inbatch_neg_score": 0.5493, "inbatch_pos_score": 1.2109, "learning_rate": 1.2444444444444445e-05, "loss": 3.0574, "norm_diff": 0.0537, "norm_loss": 0.0, "num_token_doc": 66.7451, "num_token_overlap": 15.8394, "num_token_query": 42.4565, "num_token_union": 68.5594, "num_word_context": 202.0008, "num_word_doc": 49.8088, "num_word_query": 32.0819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2102.0625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5508, "query_norm": 1.4773, "queue_k_norm": 1.5372, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4565, "sent_len_1": 66.7451, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.4225, "stdk": 0.0484, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0899, "doc_norm": 1.5335, "encoder_q-embeddings": 1064.2291, "encoder_q-layer.0": 718.1705, "encoder_q-layer.1": 751.4459, "encoder_q-layer.10": 1153.5995, "encoder_q-layer.11": 2796.5566, "encoder_q-layer.2": 874.179, "encoder_q-layer.3": 924.0533, "encoder_q-layer.4": 1016.5129, "encoder_q-layer.5": 1047.6945, "encoder_q-layer.6": 1119.9204, "encoder_q-layer.7": 1156.8281, "encoder_q-layer.8": 1248.8906, "encoder_q-layer.9": 1115.0841, "epoch": 0.76, "inbatch_neg_score": 0.5516, "inbatch_pos_score": 1.207, "learning_rate": 1.238888888888889e-05, "loss": 3.0899, "norm_diff": 0.0793, "norm_loss": 0.0, "num_token_doc": 66.8327, "num_token_overlap": 15.7508, "num_token_query": 42.2052, "num_token_union": 68.4679, "num_word_context": 202.2104, "num_word_doc": 49.8903, "num_word_query": 31.8633, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1916.2855, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5508, "query_norm": 1.4542, "queue_k_norm": 1.5377, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2052, "sent_len_1": 66.8327, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.6838, "stdk": 0.0485, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0685, "doc_norm": 1.536, "encoder_q-embeddings": 1555.5665, "encoder_q-layer.0": 1068.797, "encoder_q-layer.1": 1182.0481, "encoder_q-layer.10": 1205.7292, "encoder_q-layer.11": 2848.9197, "encoder_q-layer.2": 1422.89, "encoder_q-layer.3": 1437.0841, "encoder_q-layer.4": 1557.3132, "encoder_q-layer.5": 1573.6201, "encoder_q-layer.6": 1742.3322, "encoder_q-layer.7": 1776.1945, "encoder_q-layer.8": 1651.0474, "encoder_q-layer.9": 1287.0129, "epoch": 0.76, "inbatch_neg_score": 0.554, "inbatch_pos_score": 1.2373, "learning_rate": 1.2333333333333334e-05, "loss": 3.0685, "norm_diff": 0.066, "norm_loss": 0.0, "num_token_doc": 66.7279, "num_token_overlap": 15.7597, "num_token_query": 42.3071, "num_token_union": 68.513, "num_word_context": 202.2406, "num_word_doc": 49.8433, "num_word_query": 31.9418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2456.2161, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5537, "query_norm": 1.47, "queue_k_norm": 1.539, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3071, "sent_len_1": 66.7279, "sent_len_max_0": 127.995, "sent_len_max_1": 188.9038, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0827, "doc_norm": 1.5382, "encoder_q-embeddings": 1176.9437, "encoder_q-layer.0": 801.2687, "encoder_q-layer.1": 939.5188, "encoder_q-layer.10": 1199.3418, "encoder_q-layer.11": 2823.8669, "encoder_q-layer.2": 1079.7545, "encoder_q-layer.3": 1156.0714, "encoder_q-layer.4": 1202.4448, "encoder_q-layer.5": 1297.6017, "encoder_q-layer.6": 1335.1954, "encoder_q-layer.7": 1386.4447, "encoder_q-layer.8": 1422.5513, "encoder_q-layer.9": 1159.6567, "epoch": 0.76, "inbatch_neg_score": 0.5602, "inbatch_pos_score": 1.2441, "learning_rate": 1.2277777777777778e-05, "loss": 3.0827, "norm_diff": 0.0612, "norm_loss": 0.0, "num_token_doc": 66.5475, "num_token_overlap": 15.8028, "num_token_query": 42.2319, "num_token_union": 68.3018, "num_word_context": 202.0723, "num_word_doc": 49.6411, "num_word_query": 31.8942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2081.5051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.477, "queue_k_norm": 1.5361, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2319, "sent_len_1": 66.5475, "sent_len_max_0": 127.985, "sent_len_max_1": 190.3212, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0655, "doc_norm": 1.5403, "encoder_q-embeddings": 1651.7289, "encoder_q-layer.0": 1122.9822, "encoder_q-layer.1": 1331.5414, "encoder_q-layer.10": 1259.8809, "encoder_q-layer.11": 2872.9126, "encoder_q-layer.2": 1541.8943, "encoder_q-layer.3": 1624.2313, "encoder_q-layer.4": 1704.7095, "encoder_q-layer.5": 1684.8365, "encoder_q-layer.6": 1452.4476, "encoder_q-layer.7": 1458.8796, "encoder_q-layer.8": 1402.2208, "encoder_q-layer.9": 1211.0859, "epoch": 0.76, "inbatch_neg_score": 0.5611, "inbatch_pos_score": 1.2598, "learning_rate": 1.2222222222222222e-05, "loss": 3.0655, "norm_diff": 0.0734, "norm_loss": 0.0, "num_token_doc": 66.6928, "num_token_overlap": 15.8168, "num_token_query": 42.3563, "num_token_union": 68.4584, "num_word_context": 202.1223, "num_word_doc": 49.8009, "num_word_query": 32.0174, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2436.1477, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.561, "query_norm": 1.4669, "queue_k_norm": 1.54, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3563, "sent_len_1": 66.6928, "sent_len_max_0": 128.0, "sent_len_max_1": 188.455, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0775, "doc_norm": 1.5407, "encoder_q-embeddings": 5546.9141, "encoder_q-layer.0": 3836.6487, "encoder_q-layer.1": 4098.3652, "encoder_q-layer.10": 2424.6169, "encoder_q-layer.11": 5770.2388, "encoder_q-layer.2": 4990.1479, "encoder_q-layer.3": 4926.9106, "encoder_q-layer.4": 5601.1577, "encoder_q-layer.5": 6105.0571, "encoder_q-layer.6": 5698.084, "encoder_q-layer.7": 5607.7598, "encoder_q-layer.8": 4640.5215, "encoder_q-layer.9": 2822.1587, "epoch": 0.76, "inbatch_neg_score": 0.5593, "inbatch_pos_score": 1.2324, "learning_rate": 1.2166666666666668e-05, "loss": 3.0775, "norm_diff": 0.0778, "norm_loss": 0.0, "num_token_doc": 66.7293, "num_token_overlap": 15.8843, "num_token_query": 42.4949, "num_token_union": 68.4849, "num_word_context": 202.093, "num_word_doc": 49.7703, "num_word_query": 32.093, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7438.0649, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5586, "query_norm": 1.4629, "queue_k_norm": 1.5389, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4949, "sent_len_1": 66.7293, "sent_len_max_0": 127.99, "sent_len_max_1": 190.1525, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.0798, "doc_norm": 1.5365, "encoder_q-embeddings": 2841.8503, "encoder_q-layer.0": 1933.8291, "encoder_q-layer.1": 2132.8784, "encoder_q-layer.10": 2438.071, "encoder_q-layer.11": 5721.2324, "encoder_q-layer.2": 2495.7969, "encoder_q-layer.3": 2573.3464, "encoder_q-layer.4": 2623.0396, "encoder_q-layer.5": 2718.8464, "encoder_q-layer.6": 2845.8689, "encoder_q-layer.7": 3140.3757, "encoder_q-layer.8": 3246.4473, "encoder_q-layer.9": 2573.4509, "epoch": 0.76, "inbatch_neg_score": 0.561, "inbatch_pos_score": 1.2227, "learning_rate": 1.2111111111111112e-05, "loss": 3.0798, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.9216, "num_token_overlap": 15.8315, "num_token_query": 42.2673, "num_token_union": 68.4933, "num_word_context": 202.1706, "num_word_doc": 49.9096, "num_word_query": 31.932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4581.2324, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.4595, "queue_k_norm": 1.5424, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2673, "sent_len_1": 66.9216, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1175, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.067, "doc_norm": 1.5396, "encoder_q-embeddings": 2561.8579, "encoder_q-layer.0": 1746.9408, "encoder_q-layer.1": 1993.2572, "encoder_q-layer.10": 1202.9053, "encoder_q-layer.11": 2927.009, "encoder_q-layer.2": 2517.9326, "encoder_q-layer.3": 2664.7981, "encoder_q-layer.4": 2848.5757, "encoder_q-layer.5": 2781.4661, "encoder_q-layer.6": 2533.2515, "encoder_q-layer.7": 2125.7092, "encoder_q-layer.8": 1606.0831, "encoder_q-layer.9": 1271.6266, "epoch": 0.76, "inbatch_neg_score": 0.5626, "inbatch_pos_score": 1.2529, "learning_rate": 1.2055555555555556e-05, "loss": 3.067, "norm_diff": 0.0705, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 15.7685, "num_token_query": 42.3054, "num_token_union": 68.535, "num_word_context": 202.3825, "num_word_doc": 49.844, "num_word_query": 31.9821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3439.3386, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5625, "query_norm": 1.4691, "queue_k_norm": 1.5414, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3054, "sent_len_1": 66.7575, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.0037, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.0804, "doc_norm": 1.5467, "encoder_q-embeddings": 1185.749, "encoder_q-layer.0": 840.8614, "encoder_q-layer.1": 895.1301, "encoder_q-layer.10": 1221.4557, "encoder_q-layer.11": 2826.8491, "encoder_q-layer.2": 1010.4059, "encoder_q-layer.3": 1049.6439, "encoder_q-layer.4": 1084.4669, "encoder_q-layer.5": 1094.3447, "encoder_q-layer.6": 1170.2975, "encoder_q-layer.7": 1280.0779, "encoder_q-layer.8": 1336.7484, "encoder_q-layer.9": 1188.646, "epoch": 0.77, "inbatch_neg_score": 0.5644, "inbatch_pos_score": 1.2568, "learning_rate": 1.2e-05, "loss": 3.0804, "norm_diff": 0.0822, "norm_loss": 0.0, "num_token_doc": 66.6391, "num_token_overlap": 15.7864, "num_token_query": 42.2057, "num_token_union": 68.3174, "num_word_context": 202.1622, "num_word_doc": 49.672, "num_word_query": 31.8784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2024.6337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5645, "query_norm": 1.4645, "queue_k_norm": 1.542, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2057, "sent_len_1": 66.6391, "sent_len_max_0": 127.9975, "sent_len_max_1": 191.5525, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.0653, "doc_norm": 1.5426, "encoder_q-embeddings": 1852.333, "encoder_q-layer.0": 1270.1019, "encoder_q-layer.1": 1367.5657, "encoder_q-layer.10": 1212.3353, "encoder_q-layer.11": 2783.9036, "encoder_q-layer.2": 1511.3876, "encoder_q-layer.3": 1593.6702, "encoder_q-layer.4": 1707.0265, "encoder_q-layer.5": 1826.6504, "encoder_q-layer.6": 2127.04, "encoder_q-layer.7": 2155.3928, "encoder_q-layer.8": 1799.9662, "encoder_q-layer.9": 1257.8085, "epoch": 0.77, "inbatch_neg_score": 0.562, "inbatch_pos_score": 1.2441, "learning_rate": 1.1944444444444446e-05, "loss": 3.0653, "norm_diff": 0.0792, "norm_loss": 0.0, "num_token_doc": 66.8168, "num_token_overlap": 15.8206, "num_token_query": 42.2975, "num_token_union": 68.4922, "num_word_context": 202.5218, "num_word_doc": 49.8522, "num_word_query": 31.9414, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2670.9398, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.4634, "queue_k_norm": 1.5413, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2975, "sent_len_1": 66.8168, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.1975, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.069, "doc_norm": 1.5483, "encoder_q-embeddings": 1259.1265, "encoder_q-layer.0": 848.3611, "encoder_q-layer.1": 891.9215, "encoder_q-layer.10": 1295.8174, "encoder_q-layer.11": 3090.7576, "encoder_q-layer.2": 1048.6163, "encoder_q-layer.3": 1059.771, "encoder_q-layer.4": 1145.0406, "encoder_q-layer.5": 1224.2881, "encoder_q-layer.6": 1229.8074, "encoder_q-layer.7": 1262.0522, "encoder_q-layer.8": 1306.624, "encoder_q-layer.9": 1151.3649, "epoch": 0.77, "inbatch_neg_score": 0.5641, "inbatch_pos_score": 1.2285, "learning_rate": 1.188888888888889e-05, "loss": 3.069, "norm_diff": 0.0863, "norm_loss": 0.0, "num_token_doc": 66.824, "num_token_overlap": 15.8375, "num_token_query": 42.4137, "num_token_union": 68.5475, "num_word_context": 202.6666, "num_word_doc": 49.8762, "num_word_query": 32.0554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2106.8929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.564, "query_norm": 1.4619, "queue_k_norm": 1.5426, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4137, "sent_len_1": 66.824, "sent_len_max_0": 127.995, "sent_len_max_1": 191.88, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.0561, "doc_norm": 1.5428, "encoder_q-embeddings": 1189.3531, "encoder_q-layer.0": 786.4324, "encoder_q-layer.1": 880.5868, "encoder_q-layer.10": 1226.8844, "encoder_q-layer.11": 2993.0884, "encoder_q-layer.2": 975.2543, "encoder_q-layer.3": 1052.3302, "encoder_q-layer.4": 1115.2585, "encoder_q-layer.5": 1173.4576, "encoder_q-layer.6": 1319.9525, "encoder_q-layer.7": 1319.9612, "encoder_q-layer.8": 1408.7233, "encoder_q-layer.9": 1175.1687, "epoch": 0.77, "inbatch_neg_score": 0.562, "inbatch_pos_score": 1.252, "learning_rate": 1.1833333333333334e-05, "loss": 3.0561, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.4093, "num_token_overlap": 15.8546, "num_token_query": 42.6051, "num_token_union": 68.3894, "num_word_context": 202.2248, "num_word_doc": 49.5542, "num_word_query": 32.1579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2053.226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.563, "query_norm": 1.4658, "queue_k_norm": 1.5422, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.6051, "sent_len_1": 66.4093, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.0362, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.0803, "doc_norm": 1.5434, "encoder_q-embeddings": 967.1479, "encoder_q-layer.0": 625.4603, "encoder_q-layer.1": 647.565, "encoder_q-layer.10": 1320.8717, "encoder_q-layer.11": 2944.3193, "encoder_q-layer.2": 701.9677, "encoder_q-layer.3": 742.9552, "encoder_q-layer.4": 777.4987, "encoder_q-layer.5": 836.4417, "encoder_q-layer.6": 920.8325, "encoder_q-layer.7": 1035.5104, "encoder_q-layer.8": 1262.1637, "encoder_q-layer.9": 1177.8383, "epoch": 0.77, "inbatch_neg_score": 0.5626, "inbatch_pos_score": 1.2539, "learning_rate": 1.1777777777777778e-05, "loss": 3.0803, "norm_diff": 0.0621, "norm_loss": 0.0, "num_token_doc": 67.072, "num_token_overlap": 15.8845, "num_token_query": 42.4936, "num_token_union": 68.7092, "num_word_context": 202.4763, "num_word_doc": 50.1175, "num_word_query": 32.0866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1824.9844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.4813, "queue_k_norm": 1.544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4936, "sent_len_1": 67.072, "sent_len_max_0": 128.0, "sent_len_max_1": 188.5387, "stdk": 0.0487, "stdq": 0.0458, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0537, "doc_norm": 1.5459, "encoder_q-embeddings": 4721.3267, "encoder_q-layer.0": 3863.0295, "encoder_q-layer.1": 3963.248, "encoder_q-layer.10": 1249.0177, "encoder_q-layer.11": 2820.8782, "encoder_q-layer.2": 3945.8516, "encoder_q-layer.3": 3756.0674, "encoder_q-layer.4": 3497.2566, "encoder_q-layer.5": 3204.3997, "encoder_q-layer.6": 3479.8845, "encoder_q-layer.7": 2868.1738, "encoder_q-layer.8": 1923.7528, "encoder_q-layer.9": 1255.1108, "epoch": 0.77, "inbatch_neg_score": 0.5621, "inbatch_pos_score": 1.2559, "learning_rate": 1.1722222222222224e-05, "loss": 3.0537, "norm_diff": 0.078, "norm_loss": 0.0, "num_token_doc": 66.7032, "num_token_overlap": 15.8219, "num_token_query": 42.3277, "num_token_union": 68.4565, "num_word_context": 202.303, "num_word_doc": 49.7894, "num_word_query": 31.9376, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5141.356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.4678, "queue_k_norm": 1.5443, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3277, "sent_len_1": 66.7032, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4062, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.0531, "doc_norm": 1.544, "encoder_q-embeddings": 1339.2571, "encoder_q-layer.0": 894.3767, "encoder_q-layer.1": 953.0516, "encoder_q-layer.10": 1275.0497, "encoder_q-layer.11": 2926.8367, "encoder_q-layer.2": 1117.467, "encoder_q-layer.3": 1210.9807, "encoder_q-layer.4": 1327.4263, "encoder_q-layer.5": 1491.3883, "encoder_q-layer.6": 1556.178, "encoder_q-layer.7": 1536.6813, "encoder_q-layer.8": 1492.8976, "encoder_q-layer.9": 1269.8201, "epoch": 0.77, "inbatch_neg_score": 0.5632, "inbatch_pos_score": 1.248, "learning_rate": 1.1666666666666668e-05, "loss": 3.0531, "norm_diff": 0.0808, "norm_loss": 0.0, "num_token_doc": 66.8344, "num_token_overlap": 15.89, "num_token_query": 42.4449, "num_token_union": 68.5204, "num_word_context": 202.4237, "num_word_doc": 49.8464, "num_word_query": 32.0962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2233.0052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.4632, "queue_k_norm": 1.545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4449, "sent_len_1": 66.8344, "sent_len_max_0": 128.0, "sent_len_max_1": 191.775, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.0555, "doc_norm": 1.5511, "encoder_q-embeddings": 1634.9182, "encoder_q-layer.0": 1153.3087, "encoder_q-layer.1": 1275.5577, "encoder_q-layer.10": 1316.1985, "encoder_q-layer.11": 2835.7131, "encoder_q-layer.2": 1475.9297, "encoder_q-layer.3": 1479.1663, "encoder_q-layer.4": 1516.6523, "encoder_q-layer.5": 1549.3977, "encoder_q-layer.6": 1714.344, "encoder_q-layer.7": 1712.923, "encoder_q-layer.8": 1539.5519, "encoder_q-layer.9": 1319.1194, "epoch": 0.77, "inbatch_neg_score": 0.563, "inbatch_pos_score": 1.25, "learning_rate": 1.1611111111111112e-05, "loss": 3.0555, "norm_diff": 0.1015, "norm_loss": 0.0, "num_token_doc": 66.691, "num_token_overlap": 15.7644, "num_token_query": 42.2133, "num_token_union": 68.3864, "num_word_context": 202.1196, "num_word_doc": 49.7858, "num_word_query": 31.8999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2455.4433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5625, "query_norm": 1.4496, "queue_k_norm": 1.5444, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2133, "sent_len_1": 66.691, "sent_len_max_0": 127.9975, "sent_len_max_1": 192.165, "stdk": 0.049, "stdq": 0.0444, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0688, "doc_norm": 1.5452, "encoder_q-embeddings": 1651.2491, "encoder_q-layer.0": 1214.1265, "encoder_q-layer.1": 1236.822, "encoder_q-layer.10": 1197.0083, "encoder_q-layer.11": 2761.5691, "encoder_q-layer.2": 1458.2583, "encoder_q-layer.3": 1502.0625, "encoder_q-layer.4": 1632.2415, "encoder_q-layer.5": 1913.0251, "encoder_q-layer.6": 1698.6307, "encoder_q-layer.7": 1743.9817, "encoder_q-layer.8": 1543.3196, "encoder_q-layer.9": 1186.2644, "epoch": 0.77, "inbatch_neg_score": 0.556, "inbatch_pos_score": 1.252, "learning_rate": 1.1555555555555556e-05, "loss": 3.0688, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.706, "num_token_overlap": 15.8562, "num_token_query": 42.2139, "num_token_union": 68.3417, "num_word_context": 202.2973, "num_word_doc": 49.7728, "num_word_query": 31.8803, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2492.9043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5566, "query_norm": 1.4661, "queue_k_norm": 1.5437, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2139, "sent_len_1": 66.706, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.7038, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.0796, "doc_norm": 1.5478, "encoder_q-embeddings": 1084.4912, "encoder_q-layer.0": 724.2222, "encoder_q-layer.1": 805.417, "encoder_q-layer.10": 1203.9847, "encoder_q-layer.11": 2758.8647, "encoder_q-layer.2": 887.3557, "encoder_q-layer.3": 961.2012, "encoder_q-layer.4": 1012.502, "encoder_q-layer.5": 1032.9673, "encoder_q-layer.6": 1106.2753, "encoder_q-layer.7": 1181.8119, "encoder_q-layer.8": 1280.385, "encoder_q-layer.9": 1149.6147, "epoch": 0.77, "inbatch_neg_score": 0.5599, "inbatch_pos_score": 1.2676, "learning_rate": 1.1500000000000002e-05, "loss": 3.0796, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.6889, "num_token_overlap": 15.8311, "num_token_query": 42.2511, "num_token_union": 68.4121, "num_word_context": 202.1529, "num_word_doc": 49.7868, "num_word_query": 31.9142, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1929.2994, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.4729, "queue_k_norm": 1.5442, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2511, "sent_len_1": 66.6889, "sent_len_max_0": 128.0, "sent_len_max_1": 188.11, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.073, "doc_norm": 1.543, "encoder_q-embeddings": 1064.7258, "encoder_q-layer.0": 691.6606, "encoder_q-layer.1": 759.799, "encoder_q-layer.10": 1192.4656, "encoder_q-layer.11": 2852.6597, "encoder_q-layer.2": 883.5724, "encoder_q-layer.3": 925.0262, "encoder_q-layer.4": 1032.563, "encoder_q-layer.5": 997.3765, "encoder_q-layer.6": 1127.0278, "encoder_q-layer.7": 1181.9039, "encoder_q-layer.8": 1282.2671, "encoder_q-layer.9": 1207.6963, "epoch": 0.78, "inbatch_neg_score": 0.5639, "inbatch_pos_score": 1.25, "learning_rate": 1.1444444444444446e-05, "loss": 3.073, "norm_diff": 0.0785, "norm_loss": 0.0, "num_token_doc": 66.5179, "num_token_overlap": 15.7836, "num_token_query": 42.2401, "num_token_union": 68.297, "num_word_context": 202.0677, "num_word_doc": 49.642, "num_word_query": 31.9035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1971.1382, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5635, "query_norm": 1.4644, "queue_k_norm": 1.5446, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2401, "sent_len_1": 66.5179, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.0062, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0663, "doc_norm": 1.554, "encoder_q-embeddings": 1296.4644, "encoder_q-layer.0": 845.1272, "encoder_q-layer.1": 916.556, "encoder_q-layer.10": 1238.7946, "encoder_q-layer.11": 2831.2493, "encoder_q-layer.2": 1084.3173, "encoder_q-layer.3": 1234.6337, "encoder_q-layer.4": 1381.8759, "encoder_q-layer.5": 1404.2396, "encoder_q-layer.6": 1507.1394, "encoder_q-layer.7": 1490.0487, "encoder_q-layer.8": 1461.4203, "encoder_q-layer.9": 1198.629, "epoch": 0.78, "inbatch_neg_score": 0.5604, "inbatch_pos_score": 1.2607, "learning_rate": 1.138888888888889e-05, "loss": 3.0663, "norm_diff": 0.0867, "norm_loss": 0.0, "num_token_doc": 67.1017, "num_token_overlap": 15.8452, "num_token_query": 42.2469, "num_token_union": 68.5991, "num_word_context": 202.4935, "num_word_doc": 50.0838, "num_word_query": 31.9267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2200.4946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5601, "query_norm": 1.4673, "queue_k_norm": 1.5457, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2469, "sent_len_1": 67.1017, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.5538, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 3.0602, "doc_norm": 1.5443, "encoder_q-embeddings": 1316.1764, "encoder_q-layer.0": 875.66, "encoder_q-layer.1": 1012.1213, "encoder_q-layer.10": 1197.2313, "encoder_q-layer.11": 2758.4263, "encoder_q-layer.2": 1113.2888, "encoder_q-layer.3": 1213.1144, "encoder_q-layer.4": 1237.8439, "encoder_q-layer.5": 1252.2494, "encoder_q-layer.6": 1365.6815, "encoder_q-layer.7": 1420.8759, "encoder_q-layer.8": 1396.9857, "encoder_q-layer.9": 1183.5052, "epoch": 0.78, "inbatch_neg_score": 0.5639, "inbatch_pos_score": 1.2539, "learning_rate": 1.1333333333333334e-05, "loss": 3.0602, "norm_diff": 0.081, "norm_loss": 0.0, "num_token_doc": 66.7222, "num_token_overlap": 15.8193, "num_token_query": 42.2868, "num_token_union": 68.4296, "num_word_context": 201.9302, "num_word_doc": 49.7756, "num_word_query": 31.9211, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2118.338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5635, "query_norm": 1.4633, "queue_k_norm": 1.546, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2868, "sent_len_1": 66.7222, "sent_len_max_0": 127.9875, "sent_len_max_1": 189.28, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.0665, "doc_norm": 1.55, "encoder_q-embeddings": 497.4419, "encoder_q-layer.0": 319.6612, "encoder_q-layer.1": 337.572, "encoder_q-layer.10": 680.8714, "encoder_q-layer.11": 1539.9561, "encoder_q-layer.2": 381.1207, "encoder_q-layer.3": 393.1667, "encoder_q-layer.4": 405.6258, "encoder_q-layer.5": 424.3543, "encoder_q-layer.6": 476.5179, "encoder_q-layer.7": 573.5045, "encoder_q-layer.8": 684.9509, "encoder_q-layer.9": 636.5474, "epoch": 0.78, "inbatch_neg_score": 0.5598, "inbatch_pos_score": 1.2383, "learning_rate": 1.127777777777778e-05, "loss": 3.0665, "norm_diff": 0.0905, "norm_loss": 0.0, "num_token_doc": 66.837, "num_token_overlap": 15.8026, "num_token_query": 42.3544, "num_token_union": 68.4888, "num_word_context": 202.3678, "num_word_doc": 49.798, "num_word_query": 31.9734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 947.8618, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.4595, "queue_k_norm": 1.5459, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3544, "sent_len_1": 66.837, "sent_len_max_0": 127.9912, "sent_len_max_1": 192.0888, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0683, "doc_norm": 1.5486, "encoder_q-embeddings": 714.6067, "encoder_q-layer.0": 456.0534, "encoder_q-layer.1": 445.2906, "encoder_q-layer.10": 599.2075, "encoder_q-layer.11": 1435.0193, "encoder_q-layer.2": 479.3011, "encoder_q-layer.3": 437.4387, "encoder_q-layer.4": 454.2609, "encoder_q-layer.5": 467.7594, "encoder_q-layer.6": 525.1105, "encoder_q-layer.7": 584.0804, "encoder_q-layer.8": 652.918, "encoder_q-layer.9": 608.5227, "epoch": 0.78, "inbatch_neg_score": 0.5587, "inbatch_pos_score": 1.2598, "learning_rate": 1.1222222222222224e-05, "loss": 3.0683, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.9941, "num_token_overlap": 15.8548, "num_token_query": 42.2752, "num_token_union": 68.5285, "num_word_context": 202.4709, "num_word_doc": 49.979, "num_word_query": 31.9221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1009.6922, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5586, "query_norm": 1.4636, "queue_k_norm": 1.5457, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2752, "sent_len_1": 66.9941, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.0625, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 58.9844, "active_queue_size": 16384.0, "cl_loss": 3.0721, "doc_norm": 1.5436, "encoder_q-embeddings": 575.6414, "encoder_q-layer.0": 399.2907, "encoder_q-layer.1": 407.969, "encoder_q-layer.10": 598.2237, "encoder_q-layer.11": 1396.1229, "encoder_q-layer.2": 439.6299, "encoder_q-layer.3": 472.0051, "encoder_q-layer.4": 517.352, "encoder_q-layer.5": 510.7138, "encoder_q-layer.6": 552.7959, "encoder_q-layer.7": 586.901, "encoder_q-layer.8": 627.6997, "encoder_q-layer.9": 583.9071, "epoch": 0.78, "inbatch_neg_score": 0.5612, "inbatch_pos_score": 1.2715, "learning_rate": 1.1166666666666668e-05, "loss": 3.0721, "norm_diff": 0.0801, "norm_loss": 0.0, "num_token_doc": 66.7252, "num_token_overlap": 15.8403, "num_token_query": 42.3442, "num_token_union": 68.4301, "num_word_context": 202.3347, "num_word_doc": 49.8158, "num_word_query": 31.9902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 970.9602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5615, "query_norm": 1.4635, "queue_k_norm": 1.5431, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3442, "sent_len_1": 66.7252, "sent_len_max_0": 127.9875, "sent_len_max_1": 187.1375, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0487, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0701, "doc_norm": 1.5517, "encoder_q-embeddings": 1028.382, "encoder_q-layer.0": 726.7609, "encoder_q-layer.1": 827.7856, "encoder_q-layer.10": 576.8801, "encoder_q-layer.11": 1431.5778, "encoder_q-layer.2": 939.2218, "encoder_q-layer.3": 1050.6066, "encoder_q-layer.4": 1039.0321, "encoder_q-layer.5": 1146.6351, "encoder_q-layer.6": 1101.467, "encoder_q-layer.7": 1194.1472, "encoder_q-layer.8": 945.9922, "encoder_q-layer.9": 642.522, "epoch": 0.78, "inbatch_neg_score": 0.566, "inbatch_pos_score": 1.2363, "learning_rate": 1.1111111111111112e-05, "loss": 3.0701, "norm_diff": 0.0978, "norm_loss": 0.0, "num_token_doc": 66.4435, "num_token_overlap": 15.8035, "num_token_query": 42.2329, "num_token_union": 68.2368, "num_word_context": 201.7155, "num_word_doc": 49.5685, "num_word_query": 31.8999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1508.4069, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5649, "query_norm": 1.4538, "queue_k_norm": 1.5466, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2329, "sent_len_1": 66.4435, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3162, "stdk": 0.0489, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 26.9286, "dev_samples_per_second": 2.377, "dev_steps_per_second": 0.037, "epoch": 0.78, "step": 80000, "test_accuracy": 93.994140625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.35262423753738403, "test_doc_norm": 1.5324500799179077, "test_inbatch_neg_score": 0.9299855828285217, "test_inbatch_pos_score": 1.879270076751709, "test_loss": 0.35262423753738403, "test_loss_align": 1.0068016052246094, "test_loss_unif": 3.372405529022217, "test_loss_unif_q@queue": 3.372405767440796, "test_norm_diff": 0.015050247311592102, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5631538033485413, "test_query_norm": 1.5452686548233032, "test_queue_k_norm": 1.5466943979263306, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042886361479759216, "test_stdq": 0.04269346594810486, "test_stdqueue_k": 0.04884558171033859, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.9286, "dev_samples_per_second": 2.377, "dev_steps_per_second": 0.037, "epoch": 0.78, "eval_beir-arguana_ndcg@10": 0.37851, "eval_beir-arguana_recall@10": 0.63798, "eval_beir-arguana_recall@100": 0.93101, "eval_beir-arguana_recall@20": 0.77596, "eval_beir-avg_ndcg@10": 0.3769166666666666, "eval_beir-avg_recall@10": 0.44535658333333333, "eval_beir-avg_recall@100": 0.62846625, "eval_beir-avg_recall@20": 0.5063601666666667, "eval_beir-cqadupstack_ndcg@10": 0.2624366666666667, "eval_beir-cqadupstack_recall@10": 0.35687583333333334, "eval_beir-cqadupstack_recall@100": 0.5904725000000001, "eval_beir-cqadupstack_recall@20": 0.42359166666666664, "eval_beir-fiqa_ndcg@10": 0.24968, "eval_beir-fiqa_recall@10": 0.31709, "eval_beir-fiqa_recall@100": 0.57482, "eval_beir-fiqa_recall@20": 0.38188, "eval_beir-nfcorpus_ndcg@10": 0.29328, "eval_beir-nfcorpus_recall@10": 0.14602, "eval_beir-nfcorpus_recall@100": 0.28203, "eval_beir-nfcorpus_recall@20": 0.17675, "eval_beir-nq_ndcg@10": 0.27358, "eval_beir-nq_recall@10": 0.44276, "eval_beir-nq_recall@100": 0.79031, "eval_beir-nq_recall@20": 0.56627, "eval_beir-quora_ndcg@10": 0.76936, "eval_beir-quora_recall@10": 0.88166, "eval_beir-quora_recall@100": 0.97473, "eval_beir-quora_recall@20": 0.92446, "eval_beir-scidocs_ndcg@10": 0.15084, "eval_beir-scidocs_recall@10": 0.15763, "eval_beir-scidocs_recall@100": 0.36718, "eval_beir-scidocs_recall@20": 0.21468, "eval_beir-scifact_ndcg@10": 0.64564, "eval_beir-scifact_recall@10": 0.78678, "eval_beir-scifact_recall@100": 0.90822, "eval_beir-scifact_recall@20": 0.83411, "eval_beir-trec-covid_ndcg@10": 0.55509, "eval_beir-trec-covid_recall@10": 0.594, "eval_beir-trec-covid_recall@100": 0.4516, "eval_beir-trec-covid_recall@20": 0.559, "eval_beir-webis-touche2020_ndcg@10": 0.19075, "eval_beir-webis-touche2020_recall@10": 0.13277, "eval_beir-webis-touche2020_recall@100": 0.41429, "eval_beir-webis-touche2020_recall@20": 0.2069, "eval_senteval-avg_sts": 0.7517989185748324, "eval_senteval-sickr_spearman": 0.7189827928171652, "eval_senteval-stsb_spearman": 0.7846150443324995, "step": 80000, "test_accuracy": 93.994140625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.35262423753738403, "test_doc_norm": 1.5324500799179077, "test_inbatch_neg_score": 0.9299855828285217, "test_inbatch_pos_score": 1.879270076751709, "test_loss": 0.35262423753738403, "test_loss_align": 1.0068016052246094, "test_loss_unif": 3.372405529022217, "test_loss_unif_q@queue": 3.372405767440796, "test_norm_diff": 0.015050247311592102, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5631538033485413, "test_query_norm": 1.5452686548233032, "test_queue_k_norm": 1.5466943979263306, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042886361479759216, "test_stdq": 0.04269346594810486, "test_stdqueue_k": 0.04884558171033859, "test_stdqueue_q": 0.0 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0712, "doc_norm": 1.5457, "encoder_q-embeddings": 564.2468, "encoder_q-layer.0": 372.9569, "encoder_q-layer.1": 412.5004, "encoder_q-layer.10": 592.5394, "encoder_q-layer.11": 1494.0608, "encoder_q-layer.2": 469.9815, "encoder_q-layer.3": 462.6165, "encoder_q-layer.4": 494.6586, "encoder_q-layer.5": 515.598, "encoder_q-layer.6": 555.4413, "encoder_q-layer.7": 602.8849, "encoder_q-layer.8": 652.5576, "encoder_q-layer.9": 593.0275, "epoch": 0.78, "inbatch_neg_score": 0.5669, "inbatch_pos_score": 1.248, "learning_rate": 1.1055555555555556e-05, "loss": 3.0712, "norm_diff": 0.0747, "norm_loss": 0.0, "num_token_doc": 66.6906, "num_token_overlap": 15.7727, "num_token_query": 42.2416, "num_token_union": 68.4047, "num_word_context": 202.2125, "num_word_doc": 49.7605, "num_word_query": 31.9102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1016.9665, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5674, "query_norm": 1.471, "queue_k_norm": 1.5473, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2416, "sent_len_1": 66.6906, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5825, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.0675, "doc_norm": 1.5441, "encoder_q-embeddings": 931.3538, "encoder_q-layer.0": 652.1417, "encoder_q-layer.1": 682.5192, "encoder_q-layer.10": 675.0704, "encoder_q-layer.11": 1540.6841, "encoder_q-layer.2": 721.1187, "encoder_q-layer.3": 742.6218, "encoder_q-layer.4": 758.6345, "encoder_q-layer.5": 700.0625, "encoder_q-layer.6": 709.2102, "encoder_q-layer.7": 704.4491, "encoder_q-layer.8": 725.409, "encoder_q-layer.9": 636.2977, "epoch": 0.78, "inbatch_neg_score": 0.5682, "inbatch_pos_score": 1.2451, "learning_rate": 1.1000000000000001e-05, "loss": 3.0675, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.7326, "num_token_overlap": 15.7433, "num_token_query": 42.2827, "num_token_union": 68.4458, "num_word_context": 202.4241, "num_word_doc": 49.751, "num_word_query": 31.9151, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1251.356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5669, "query_norm": 1.4671, "queue_k_norm": 1.5471, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2827, "sent_len_1": 66.7326, "sent_len_max_0": 128.0, "sent_len_max_1": 192.085, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0432, "doc_norm": 1.5444, "encoder_q-embeddings": 613.7174, "encoder_q-layer.0": 425.2154, "encoder_q-layer.1": 481.2599, "encoder_q-layer.10": 647.6873, "encoder_q-layer.11": 1484.9058, "encoder_q-layer.2": 576.364, "encoder_q-layer.3": 604.4674, "encoder_q-layer.4": 657.8297, "encoder_q-layer.5": 657.3936, "encoder_q-layer.6": 682.8869, "encoder_q-layer.7": 710.2111, "encoder_q-layer.8": 758.7107, "encoder_q-layer.9": 645.9502, "epoch": 0.78, "inbatch_neg_score": 0.5707, "inbatch_pos_score": 1.248, "learning_rate": 1.0944444444444445e-05, "loss": 3.0432, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 67.0085, "num_token_overlap": 15.8753, "num_token_query": 42.459, "num_token_union": 68.6694, "num_word_context": 202.8098, "num_word_doc": 50.0135, "num_word_query": 32.0607, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1112.5405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5703, "query_norm": 1.4447, "queue_k_norm": 1.5467, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.459, "sent_len_1": 67.0085, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.5062, "stdk": 0.0486, "stdq": 0.0441, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0637, "doc_norm": 1.5469, "encoder_q-embeddings": 494.2813, "encoder_q-layer.0": 328.7633, "encoder_q-layer.1": 351.0805, "encoder_q-layer.10": 633.5509, "encoder_q-layer.11": 1451.9652, "encoder_q-layer.2": 392.3955, "encoder_q-layer.3": 414.4821, "encoder_q-layer.4": 450.1519, "encoder_q-layer.5": 463.1523, "encoder_q-layer.6": 513.5943, "encoder_q-layer.7": 557.3535, "encoder_q-layer.8": 646.5015, "encoder_q-layer.9": 594.7268, "epoch": 0.78, "inbatch_neg_score": 0.5695, "inbatch_pos_score": 1.2598, "learning_rate": 1.088888888888889e-05, "loss": 3.0637, "norm_diff": 0.0775, "norm_loss": 0.0, "num_token_doc": 66.5422, "num_token_overlap": 15.8536, "num_token_query": 42.3914, "num_token_union": 68.368, "num_word_context": 202.008, "num_word_doc": 49.6685, "num_word_query": 31.9966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 960.2765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5703, "query_norm": 1.4694, "queue_k_norm": 1.5467, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3914, "sent_len_1": 66.5422, "sent_len_max_0": 128.0, "sent_len_max_1": 187.4663, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0671, "doc_norm": 1.5504, "encoder_q-embeddings": 767.8419, "encoder_q-layer.0": 534.5985, "encoder_q-layer.1": 604.9003, "encoder_q-layer.10": 626.4929, "encoder_q-layer.11": 1435.2968, "encoder_q-layer.2": 708.1078, "encoder_q-layer.3": 762.0109, "encoder_q-layer.4": 902.2593, "encoder_q-layer.5": 960.1638, "encoder_q-layer.6": 954.3758, "encoder_q-layer.7": 880.6651, "encoder_q-layer.8": 913.3549, "encoder_q-layer.9": 653.209, "epoch": 0.79, "inbatch_neg_score": 0.5694, "inbatch_pos_score": 1.2383, "learning_rate": 1.0833333333333334e-05, "loss": 3.0671, "norm_diff": 0.0868, "norm_loss": 0.0, "num_token_doc": 66.7058, "num_token_overlap": 15.8595, "num_token_query": 42.3479, "num_token_union": 68.4286, "num_word_context": 202.1692, "num_word_doc": 49.7768, "num_word_query": 32.0092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1271.2902, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5703, "query_norm": 1.4635, "queue_k_norm": 1.5459, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3479, "sent_len_1": 66.7058, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8125, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.061, "doc_norm": 1.5519, "encoder_q-embeddings": 706.366, "encoder_q-layer.0": 500.0931, "encoder_q-layer.1": 576.9195, "encoder_q-layer.10": 599.1637, "encoder_q-layer.11": 1394.0811, "encoder_q-layer.2": 659.5191, "encoder_q-layer.3": 678.9774, "encoder_q-layer.4": 718.2499, "encoder_q-layer.5": 726.0977, "encoder_q-layer.6": 794.5296, "encoder_q-layer.7": 811.5836, "encoder_q-layer.8": 748.4694, "encoder_q-layer.9": 601.7552, "epoch": 0.79, "inbatch_neg_score": 0.5701, "inbatch_pos_score": 1.2393, "learning_rate": 1.0777777777777778e-05, "loss": 3.061, "norm_diff": 0.0996, "norm_loss": 0.0, "num_token_doc": 66.7424, "num_token_overlap": 15.8795, "num_token_query": 42.4576, "num_token_union": 68.5044, "num_word_context": 202.6508, "num_word_doc": 49.8193, "num_word_query": 32.0912, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1135.5212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5688, "query_norm": 1.4522, "queue_k_norm": 1.5467, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4576, "sent_len_1": 66.7424, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4613, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0792, "doc_norm": 1.5472, "encoder_q-embeddings": 866.4362, "encoder_q-layer.0": 614.4229, "encoder_q-layer.1": 694.0098, "encoder_q-layer.10": 651.5653, "encoder_q-layer.11": 1527.8743, "encoder_q-layer.2": 821.0956, "encoder_q-layer.3": 837.8318, "encoder_q-layer.4": 978.1354, "encoder_q-layer.5": 928.3219, "encoder_q-layer.6": 930.4224, "encoder_q-layer.7": 874.4641, "encoder_q-layer.8": 798.6587, "encoder_q-layer.9": 644.9568, "epoch": 0.79, "inbatch_neg_score": 0.5675, "inbatch_pos_score": 1.2246, "learning_rate": 1.0722222222222222e-05, "loss": 3.0792, "norm_diff": 0.0885, "norm_loss": 0.0, "num_token_doc": 67.0171, "num_token_overlap": 15.8062, "num_token_query": 42.2562, "num_token_union": 68.6021, "num_word_context": 202.712, "num_word_doc": 49.9786, "num_word_query": 31.9077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1337.3114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5669, "query_norm": 1.4587, "queue_k_norm": 1.5484, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2562, "sent_len_1": 67.0171, "sent_len_max_0": 128.0, "sent_len_max_1": 188.61, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0626, "doc_norm": 1.5435, "encoder_q-embeddings": 540.1287, "encoder_q-layer.0": 357.9731, "encoder_q-layer.1": 397.4535, "encoder_q-layer.10": 614.9166, "encoder_q-layer.11": 1449.2833, "encoder_q-layer.2": 473.636, "encoder_q-layer.3": 513.1566, "encoder_q-layer.4": 511.0457, "encoder_q-layer.5": 505.8632, "encoder_q-layer.6": 552.3804, "encoder_q-layer.7": 570.3745, "encoder_q-layer.8": 661.6403, "encoder_q-layer.9": 587.8667, "epoch": 0.79, "inbatch_neg_score": 0.5666, "inbatch_pos_score": 1.2559, "learning_rate": 1.0666666666666667e-05, "loss": 3.0626, "norm_diff": 0.0792, "norm_loss": 0.0, "num_token_doc": 66.9446, "num_token_overlap": 15.8507, "num_token_query": 42.3848, "num_token_union": 68.6165, "num_word_context": 202.8256, "num_word_doc": 49.9179, "num_word_query": 32.0141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 981.9877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5669, "query_norm": 1.4643, "queue_k_norm": 1.547, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3848, "sent_len_1": 66.9446, "sent_len_max_0": 128.0, "sent_len_max_1": 190.03, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0599, "doc_norm": 1.5517, "encoder_q-embeddings": 804.3992, "encoder_q-layer.0": 567.0931, "encoder_q-layer.1": 617.0494, "encoder_q-layer.10": 611.139, "encoder_q-layer.11": 1407.1353, "encoder_q-layer.2": 748.948, "encoder_q-layer.3": 759.2649, "encoder_q-layer.4": 912.9559, "encoder_q-layer.5": 949.9512, "encoder_q-layer.6": 1057.161, "encoder_q-layer.7": 938.9051, "encoder_q-layer.8": 771.6094, "encoder_q-layer.9": 599.2968, "epoch": 0.79, "inbatch_neg_score": 0.5715, "inbatch_pos_score": 1.2588, "learning_rate": 1.0611111111111111e-05, "loss": 3.0599, "norm_diff": 0.0807, "norm_loss": 0.0, "num_token_doc": 66.9187, "num_token_overlap": 15.8129, "num_token_query": 42.2156, "num_token_union": 68.5121, "num_word_context": 202.5296, "num_word_doc": 49.9571, "num_word_query": 31.8573, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1263.2843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5703, "query_norm": 1.4711, "queue_k_norm": 1.5479, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2156, "sent_len_1": 66.9187, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.5762, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.0766, "doc_norm": 1.5485, "encoder_q-embeddings": 523.1035, "encoder_q-layer.0": 359.2078, "encoder_q-layer.1": 382.7938, "encoder_q-layer.10": 690.4179, "encoder_q-layer.11": 1500.4662, "encoder_q-layer.2": 431.648, "encoder_q-layer.3": 452.8667, "encoder_q-layer.4": 486.5898, "encoder_q-layer.5": 522.0078, "encoder_q-layer.6": 536.1263, "encoder_q-layer.7": 602.5579, "encoder_q-layer.8": 694.2665, "encoder_q-layer.9": 601.0256, "epoch": 0.79, "inbatch_neg_score": 0.5698, "inbatch_pos_score": 1.249, "learning_rate": 1.0555555555555555e-05, "loss": 3.0766, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.5933, "num_token_overlap": 15.8295, "num_token_query": 42.3949, "num_token_union": 68.4332, "num_word_context": 202.2171, "num_word_doc": 49.6897, "num_word_query": 32.0243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1011.0375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5698, "query_norm": 1.4531, "queue_k_norm": 1.5472, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3949, "sent_len_1": 66.5933, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6725, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 3.0695, "doc_norm": 1.5409, "encoder_q-embeddings": 652.7145, "encoder_q-layer.0": 445.78, "encoder_q-layer.1": 494.9109, "encoder_q-layer.10": 577.6795, "encoder_q-layer.11": 1378.1284, "encoder_q-layer.2": 584.1971, "encoder_q-layer.3": 618.2861, "encoder_q-layer.4": 661.0308, "encoder_q-layer.5": 709.7695, "encoder_q-layer.6": 793.9888, "encoder_q-layer.7": 749.0255, "encoder_q-layer.8": 781.2198, "encoder_q-layer.9": 643.907, "epoch": 0.79, "inbatch_neg_score": 0.568, "inbatch_pos_score": 1.2607, "learning_rate": 1.05e-05, "loss": 3.0695, "norm_diff": 0.0819, "norm_loss": 0.0, "num_token_doc": 66.8583, "num_token_overlap": 15.789, "num_token_query": 42.2336, "num_token_union": 68.5051, "num_word_context": 202.2518, "num_word_doc": 49.8204, "num_word_query": 31.8673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1120.4492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5664, "query_norm": 1.459, "queue_k_norm": 1.5493, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2336, "sent_len_1": 66.8583, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.445, "stdk": 0.0484, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0471, "doc_norm": 1.5471, "encoder_q-embeddings": 1834.1687, "encoder_q-layer.0": 1385.5033, "encoder_q-layer.1": 1658.4301, "encoder_q-layer.10": 645.522, "encoder_q-layer.11": 1450.2852, "encoder_q-layer.2": 1672.7189, "encoder_q-layer.3": 1570.2704, "encoder_q-layer.4": 1787.6833, "encoder_q-layer.5": 1786.4744, "encoder_q-layer.6": 1820.5161, "encoder_q-layer.7": 1942.8265, "encoder_q-layer.8": 1450.5366, "encoder_q-layer.9": 943.174, "epoch": 0.79, "inbatch_neg_score": 0.5662, "inbatch_pos_score": 1.2344, "learning_rate": 1.0444444444444445e-05, "loss": 3.0471, "norm_diff": 0.0898, "norm_loss": 0.0, "num_token_doc": 66.8213, "num_token_overlap": 15.8719, "num_token_query": 42.4899, "num_token_union": 68.5252, "num_word_context": 202.6403, "num_word_doc": 49.8809, "num_word_query": 32.0882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2354.9272, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5659, "query_norm": 1.4572, "queue_k_norm": 1.5471, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4899, "sent_len_1": 66.8213, "sent_len_max_0": 127.995, "sent_len_max_1": 188.0875, "stdk": 0.0487, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0656, "doc_norm": 1.5454, "encoder_q-embeddings": 483.9734, "encoder_q-layer.0": 316.1265, "encoder_q-layer.1": 351.9899, "encoder_q-layer.10": 581.4396, "encoder_q-layer.11": 1375.1995, "encoder_q-layer.2": 403.8972, "encoder_q-layer.3": 429.6072, "encoder_q-layer.4": 458.0173, "encoder_q-layer.5": 483.7774, "encoder_q-layer.6": 526.5887, "encoder_q-layer.7": 596.7833, "encoder_q-layer.8": 653.1569, "encoder_q-layer.9": 551.6232, "epoch": 0.79, "inbatch_neg_score": 0.5682, "inbatch_pos_score": 1.2812, "learning_rate": 1.038888888888889e-05, "loss": 3.0656, "norm_diff": 0.0805, "norm_loss": 0.0, "num_token_doc": 66.6489, "num_token_overlap": 15.8825, "num_token_query": 42.3958, "num_token_union": 68.4343, "num_word_context": 202.0435, "num_word_doc": 49.7359, "num_word_query": 32.0241, "postclip_grad_norm": 1.0, "preclip_grad_norm": 930.2124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5688, "query_norm": 1.4649, "queue_k_norm": 1.5479, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3958, "sent_len_1": 66.6489, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.6675, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.069, "doc_norm": 1.5504, "encoder_q-embeddings": 828.0022, "encoder_q-layer.0": 592.7116, "encoder_q-layer.1": 643.4249, "encoder_q-layer.10": 632.1673, "encoder_q-layer.11": 1431.1602, "encoder_q-layer.2": 747.1048, "encoder_q-layer.3": 804.3525, "encoder_q-layer.4": 864.4583, "encoder_q-layer.5": 926.0612, "encoder_q-layer.6": 938.7539, "encoder_q-layer.7": 982.0412, "encoder_q-layer.8": 1074.1066, "encoder_q-layer.9": 768.1089, "epoch": 0.79, "inbatch_neg_score": 0.568, "inbatch_pos_score": 1.2461, "learning_rate": 1.0333333333333333e-05, "loss": 3.069, "norm_diff": 0.1006, "norm_loss": 0.0, "num_token_doc": 66.6003, "num_token_overlap": 15.8529, "num_token_query": 42.4496, "num_token_union": 68.4432, "num_word_context": 202.5066, "num_word_doc": 49.739, "num_word_query": 32.0778, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1321.9511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5684, "query_norm": 1.4498, "queue_k_norm": 1.5493, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4496, "sent_len_1": 66.6003, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7237, "stdk": 0.0488, "stdq": 0.0444, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0535, "doc_norm": 1.5422, "encoder_q-embeddings": 583.8874, "encoder_q-layer.0": 427.9828, "encoder_q-layer.1": 468.2702, "encoder_q-layer.10": 620.4233, "encoder_q-layer.11": 1422.335, "encoder_q-layer.2": 478.9917, "encoder_q-layer.3": 485.631, "encoder_q-layer.4": 503.7881, "encoder_q-layer.5": 518.4865, "encoder_q-layer.6": 535.9358, "encoder_q-layer.7": 600.7278, "encoder_q-layer.8": 661.1094, "encoder_q-layer.9": 584.2283, "epoch": 0.8, "inbatch_neg_score": 0.5704, "inbatch_pos_score": 1.25, "learning_rate": 1.0277777777777777e-05, "loss": 3.0535, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.9987, "num_token_overlap": 15.9774, "num_token_query": 42.6924, "num_token_union": 68.6653, "num_word_context": 202.4598, "num_word_doc": 49.9441, "num_word_query": 32.2567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 987.7404, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5688, "query_norm": 1.4573, "queue_k_norm": 1.5482, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.6924, "sent_len_1": 66.9987, "sent_len_max_0": 128.0, "sent_len_max_1": 190.905, "stdk": 0.0484, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0856, "doc_norm": 1.5459, "encoder_q-embeddings": 661.6245, "encoder_q-layer.0": 451.2811, "encoder_q-layer.1": 494.4655, "encoder_q-layer.10": 635.5004, "encoder_q-layer.11": 1523.0856, "encoder_q-layer.2": 555.6445, "encoder_q-layer.3": 523.9723, "encoder_q-layer.4": 578.0503, "encoder_q-layer.5": 596.7988, "encoder_q-layer.6": 636.6405, "encoder_q-layer.7": 689.0829, "encoder_q-layer.8": 749.2415, "encoder_q-layer.9": 594.9158, "epoch": 0.8, "inbatch_neg_score": 0.5723, "inbatch_pos_score": 1.2441, "learning_rate": 1.0222222222222223e-05, "loss": 3.0856, "norm_diff": 0.0928, "norm_loss": 0.0, "num_token_doc": 66.9407, "num_token_overlap": 15.8507, "num_token_query": 42.374, "num_token_union": 68.5854, "num_word_context": 202.843, "num_word_doc": 49.9823, "num_word_query": 31.982, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1104.4962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5723, "query_norm": 1.453, "queue_k_norm": 1.5477, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.374, "sent_len_1": 66.9407, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.9812, "stdk": 0.0486, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0513, "doc_norm": 1.55, "encoder_q-embeddings": 1750.4934, "encoder_q-layer.0": 1242.7433, "encoder_q-layer.1": 1415.9768, "encoder_q-layer.10": 1223.5931, "encoder_q-layer.11": 2716.4482, "encoder_q-layer.2": 1690.8716, "encoder_q-layer.3": 1774.2791, "encoder_q-layer.4": 1774.3625, "encoder_q-layer.5": 1932.5471, "encoder_q-layer.6": 1797.129, "encoder_q-layer.7": 1793.8505, "encoder_q-layer.8": 1688.0013, "encoder_q-layer.9": 1268.0074, "epoch": 0.8, "inbatch_neg_score": 0.5713, "inbatch_pos_score": 1.2637, "learning_rate": 1.0166666666666667e-05, "loss": 3.0513, "norm_diff": 0.085, "norm_loss": 0.0, "num_token_doc": 66.9138, "num_token_overlap": 15.8895, "num_token_query": 42.4565, "num_token_union": 68.5964, "num_word_context": 202.6067, "num_word_doc": 49.934, "num_word_query": 32.092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2609.718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5713, "query_norm": 1.465, "queue_k_norm": 1.5481, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4565, "sent_len_1": 66.9138, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.9837, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 56.8359, "active_queue_size": 16384.0, "cl_loss": 3.0422, "doc_norm": 1.5539, "encoder_q-embeddings": 1421.6726, "encoder_q-layer.0": 894.9545, "encoder_q-layer.1": 1030.9962, "encoder_q-layer.10": 1258.0698, "encoder_q-layer.11": 3061.136, "encoder_q-layer.2": 1215.2299, "encoder_q-layer.3": 1254.7008, "encoder_q-layer.4": 1385.9009, "encoder_q-layer.5": 1343.8439, "encoder_q-layer.6": 1361.8535, "encoder_q-layer.7": 1395.1592, "encoder_q-layer.8": 1609.4434, "encoder_q-layer.9": 1289.4591, "epoch": 0.8, "inbatch_neg_score": 0.5715, "inbatch_pos_score": 1.2764, "learning_rate": 1.0111111111111111e-05, "loss": 3.0422, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.8833, "num_token_overlap": 15.8934, "num_token_query": 42.4694, "num_token_union": 68.5607, "num_word_context": 202.1901, "num_word_doc": 49.8951, "num_word_query": 32.0877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2331.5828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5723, "query_norm": 1.4715, "queue_k_norm": 1.5487, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4694, "sent_len_1": 66.8833, "sent_len_max_0": 127.995, "sent_len_max_1": 189.6887, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0713, "doc_norm": 1.5439, "encoder_q-embeddings": 1039.9412, "encoder_q-layer.0": 658.7425, "encoder_q-layer.1": 703.7333, "encoder_q-layer.10": 1173.1094, "encoder_q-layer.11": 2821.801, "encoder_q-layer.2": 780.3959, "encoder_q-layer.3": 817.9079, "encoder_q-layer.4": 873.7737, "encoder_q-layer.5": 896.3748, "encoder_q-layer.6": 980.8325, "encoder_q-layer.7": 1099.0779, "encoder_q-layer.8": 1241.0312, "encoder_q-layer.9": 1123.7987, "epoch": 0.8, "inbatch_neg_score": 0.5761, "inbatch_pos_score": 1.2578, "learning_rate": 1.0055555555555555e-05, "loss": 3.0713, "norm_diff": 0.0761, "norm_loss": 0.0, "num_token_doc": 66.8042, "num_token_overlap": 15.7509, "num_token_query": 42.1617, "num_token_union": 68.4734, "num_word_context": 202.4535, "num_word_doc": 49.86, "num_word_query": 31.8309, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1891.5775, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5742, "query_norm": 1.4679, "queue_k_norm": 1.5508, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1617, "sent_len_1": 66.8042, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.6012, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0547, "doc_norm": 1.5499, "encoder_q-embeddings": 1046.5432, "encoder_q-layer.0": 682.0765, "encoder_q-layer.1": 731.4442, "encoder_q-layer.10": 1371.3702, "encoder_q-layer.11": 3011.0913, "encoder_q-layer.2": 819.507, "encoder_q-layer.3": 871.3273, "encoder_q-layer.4": 921.0275, "encoder_q-layer.5": 991.6278, "encoder_q-layer.6": 1100.6323, "encoder_q-layer.7": 1218.7781, "encoder_q-layer.8": 1409.7375, "encoder_q-layer.9": 1282.3033, "epoch": 0.8, "inbatch_neg_score": 0.5728, "inbatch_pos_score": 1.2559, "learning_rate": 1e-05, "loss": 3.0547, "norm_diff": 0.0753, "norm_loss": 0.0, "num_token_doc": 66.749, "num_token_overlap": 15.803, "num_token_query": 42.2494, "num_token_union": 68.4402, "num_word_context": 202.1193, "num_word_doc": 49.8235, "num_word_query": 31.9313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2002.9579, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5718, "query_norm": 1.4746, "queue_k_norm": 1.5505, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2494, "sent_len_1": 66.749, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2925, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0546, "doc_norm": 1.5473, "encoder_q-embeddings": 1049.0217, "encoder_q-layer.0": 719.0425, "encoder_q-layer.1": 776.3609, "encoder_q-layer.10": 1237.9374, "encoder_q-layer.11": 2934.4758, "encoder_q-layer.2": 889.9301, "encoder_q-layer.3": 902.813, "encoder_q-layer.4": 911.6831, "encoder_q-layer.5": 893.0111, "encoder_q-layer.6": 1042.0356, "encoder_q-layer.7": 1071.9418, "encoder_q-layer.8": 1256.3225, "encoder_q-layer.9": 1157.5721, "epoch": 0.8, "inbatch_neg_score": 0.5726, "inbatch_pos_score": 1.2559, "learning_rate": 9.944444444444445e-06, "loss": 3.0546, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.6763, "num_token_overlap": 15.8441, "num_token_query": 42.4876, "num_token_union": 68.5288, "num_word_context": 202.3716, "num_word_doc": 49.7769, "num_word_query": 32.1111, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1932.8114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5732, "query_norm": 1.4723, "queue_k_norm": 1.5477, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4876, "sent_len_1": 66.6763, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.8487, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0643, "doc_norm": 1.5516, "encoder_q-embeddings": 2117.332, "encoder_q-layer.0": 1410.7153, "encoder_q-layer.1": 1566.6787, "encoder_q-layer.10": 1392.9753, "encoder_q-layer.11": 2897.1636, "encoder_q-layer.2": 1844.3717, "encoder_q-layer.3": 1884.84, "encoder_q-layer.4": 1972.8826, "encoder_q-layer.5": 2288.311, "encoder_q-layer.6": 1976.0397, "encoder_q-layer.7": 1739.3712, "encoder_q-layer.8": 1620.2299, "encoder_q-layer.9": 1241.4719, "epoch": 0.8, "inbatch_neg_score": 0.5739, "inbatch_pos_score": 1.2568, "learning_rate": 9.888888888888889e-06, "loss": 3.0643, "norm_diff": 0.0799, "norm_loss": 0.0, "num_token_doc": 66.6833, "num_token_overlap": 15.9043, "num_token_query": 42.5702, "num_token_union": 68.5011, "num_word_context": 202.4871, "num_word_doc": 49.7436, "num_word_query": 32.1743, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2834.32, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5737, "query_norm": 1.4717, "queue_k_norm": 1.5494, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5702, "sent_len_1": 66.6833, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.2375, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0469, "doc_norm": 1.5517, "encoder_q-embeddings": 964.5175, "encoder_q-layer.0": 671.2654, "encoder_q-layer.1": 718.3314, "encoder_q-layer.10": 1356.1434, "encoder_q-layer.11": 2846.1221, "encoder_q-layer.2": 817.8693, "encoder_q-layer.3": 848.7775, "encoder_q-layer.4": 896.288, "encoder_q-layer.5": 909.0612, "encoder_q-layer.6": 1023.799, "encoder_q-layer.7": 1124.0791, "encoder_q-layer.8": 1326.6277, "encoder_q-layer.9": 1236.1373, "epoch": 0.8, "inbatch_neg_score": 0.5766, "inbatch_pos_score": 1.2656, "learning_rate": 9.833333333333333e-06, "loss": 3.0469, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.8398, "num_token_overlap": 15.8762, "num_token_query": 42.493, "num_token_union": 68.5773, "num_word_context": 202.5819, "num_word_doc": 49.8499, "num_word_query": 32.1091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1880.3567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5781, "query_norm": 1.4545, "queue_k_norm": 1.5488, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.493, "sent_len_1": 66.8398, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.6875, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 53.9062, "active_queue_size": 16384.0, "cl_loss": 3.0463, "doc_norm": 1.5529, "encoder_q-embeddings": 1340.6497, "encoder_q-layer.0": 903.0529, "encoder_q-layer.1": 1118.2845, "encoder_q-layer.10": 1304.8652, "encoder_q-layer.11": 3004.4412, "encoder_q-layer.2": 1331.063, "encoder_q-layer.3": 1488.3463, "encoder_q-layer.4": 1544.2891, "encoder_q-layer.5": 1572.0723, "encoder_q-layer.6": 1488.2734, "encoder_q-layer.7": 1563.6421, "encoder_q-layer.8": 1488.3297, "encoder_q-layer.9": 1195.0099, "epoch": 0.8, "inbatch_neg_score": 0.5778, "inbatch_pos_score": 1.2393, "learning_rate": 9.777777777777779e-06, "loss": 3.0463, "norm_diff": 0.1037, "norm_loss": 0.0, "num_token_doc": 66.9597, "num_token_overlap": 15.8442, "num_token_query": 42.2794, "num_token_union": 68.547, "num_word_context": 202.268, "num_word_doc": 49.965, "num_word_query": 31.9163, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2370.2689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5781, "query_norm": 1.4492, "queue_k_norm": 1.5487, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2794, "sent_len_1": 66.9597, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.37, "stdk": 0.0489, "stdq": 0.0442, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.0587, "doc_norm": 1.5471, "encoder_q-embeddings": 1150.2168, "encoder_q-layer.0": 807.3814, "encoder_q-layer.1": 863.2203, "encoder_q-layer.10": 1219.6068, "encoder_q-layer.11": 2921.2007, "encoder_q-layer.2": 1003.7221, "encoder_q-layer.3": 1009.7845, "encoder_q-layer.4": 1062.3296, "encoder_q-layer.5": 1086.5599, "encoder_q-layer.6": 1194.4409, "encoder_q-layer.7": 1270.4052, "encoder_q-layer.8": 1478.5762, "encoder_q-layer.9": 1244.1309, "epoch": 0.81, "inbatch_neg_score": 0.5826, "inbatch_pos_score": 1.2451, "learning_rate": 9.722222222222223e-06, "loss": 3.0587, "norm_diff": 0.0811, "norm_loss": 0.0, "num_token_doc": 66.3654, "num_token_overlap": 15.7791, "num_token_query": 42.3223, "num_token_union": 68.3035, "num_word_context": 202.0005, "num_word_doc": 49.5363, "num_word_query": 31.9807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2102.2632, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.582, "query_norm": 1.4661, "queue_k_norm": 1.5487, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3223, "sent_len_1": 66.3654, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.9087, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0634, "doc_norm": 1.5507, "encoder_q-embeddings": 1220.6255, "encoder_q-layer.0": 806.5104, "encoder_q-layer.1": 894.2315, "encoder_q-layer.10": 1315.8936, "encoder_q-layer.11": 2892.8713, "encoder_q-layer.2": 995.8584, "encoder_q-layer.3": 1046.5233, "encoder_q-layer.4": 1100.7534, "encoder_q-layer.5": 1148.2339, "encoder_q-layer.6": 1210.3153, "encoder_q-layer.7": 1335.5142, "encoder_q-layer.8": 1447.0283, "encoder_q-layer.9": 1257.7023, "epoch": 0.81, "inbatch_neg_score": 0.5829, "inbatch_pos_score": 1.2715, "learning_rate": 9.666666666666667e-06, "loss": 3.0634, "norm_diff": 0.0777, "norm_loss": 0.0, "num_token_doc": 66.9013, "num_token_overlap": 15.8026, "num_token_query": 42.2785, "num_token_union": 68.5802, "num_word_context": 202.5706, "num_word_doc": 49.921, "num_word_query": 31.9352, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2093.7701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.582, "query_norm": 1.4729, "queue_k_norm": 1.549, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2785, "sent_len_1": 66.9013, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.97, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0558, "doc_norm": 1.5521, "encoder_q-embeddings": 2048.7251, "encoder_q-layer.0": 1458.0845, "encoder_q-layer.1": 1805.5762, "encoder_q-layer.10": 1258.6908, "encoder_q-layer.11": 2987.9302, "encoder_q-layer.2": 2122.5671, "encoder_q-layer.3": 2165.4194, "encoder_q-layer.4": 2471.2205, "encoder_q-layer.5": 2656.6333, "encoder_q-layer.6": 2951.3015, "encoder_q-layer.7": 2358.7517, "encoder_q-layer.8": 2106.4824, "encoder_q-layer.9": 1613.4642, "epoch": 0.81, "inbatch_neg_score": 0.5868, "inbatch_pos_score": 1.2803, "learning_rate": 9.61111111111111e-06, "loss": 3.0558, "norm_diff": 0.0655, "norm_loss": 0.0, "num_token_doc": 66.6809, "num_token_overlap": 15.8064, "num_token_query": 42.341, "num_token_union": 68.4277, "num_word_context": 202.1561, "num_word_doc": 49.7616, "num_word_query": 31.9919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3311.2052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5869, "query_norm": 1.4866, "queue_k_norm": 1.551, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.341, "sent_len_1": 66.6809, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.3487, "stdk": 0.0488, "stdq": 0.0456, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 59.7656, "active_queue_size": 16384.0, "cl_loss": 3.0536, "doc_norm": 1.5552, "encoder_q-embeddings": 2095.5659, "encoder_q-layer.0": 1336.4836, "encoder_q-layer.1": 1590.2544, "encoder_q-layer.10": 1144.3872, "encoder_q-layer.11": 2765.6799, "encoder_q-layer.2": 1790.616, "encoder_q-layer.3": 1999.5706, "encoder_q-layer.4": 2085.3196, "encoder_q-layer.5": 2260.3752, "encoder_q-layer.6": 2439.6091, "encoder_q-layer.7": 2217.0298, "encoder_q-layer.8": 1858.1464, "encoder_q-layer.9": 1298.2526, "epoch": 0.81, "inbatch_neg_score": 0.5821, "inbatch_pos_score": 1.2852, "learning_rate": 9.555555555555556e-06, "loss": 3.0536, "norm_diff": 0.0828, "norm_loss": 0.0, "num_token_doc": 66.9591, "num_token_overlap": 15.8981, "num_token_query": 42.5267, "num_token_union": 68.6431, "num_word_context": 202.2031, "num_word_doc": 49.9229, "num_word_query": 32.1195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2984.5732, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5811, "query_norm": 1.4724, "queue_k_norm": 1.5517, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5267, "sent_len_1": 66.9591, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6813, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0641, "doc_norm": 1.5525, "encoder_q-embeddings": 1148.4764, "encoder_q-layer.0": 761.4045, "encoder_q-layer.1": 843.3662, "encoder_q-layer.10": 1267.5061, "encoder_q-layer.11": 2986.23, "encoder_q-layer.2": 969.5349, "encoder_q-layer.3": 1005.6641, "encoder_q-layer.4": 1061.6409, "encoder_q-layer.5": 1070.5428, "encoder_q-layer.6": 1163.4252, "encoder_q-layer.7": 1383.5547, "encoder_q-layer.8": 1375.8414, "encoder_q-layer.9": 1182.9189, "epoch": 0.81, "inbatch_neg_score": 0.5786, "inbatch_pos_score": 1.2646, "learning_rate": 9.5e-06, "loss": 3.0641, "norm_diff": 0.0777, "norm_loss": 0.0, "num_token_doc": 66.787, "num_token_overlap": 15.7917, "num_token_query": 42.4398, "num_token_union": 68.5713, "num_word_context": 202.339, "num_word_doc": 49.8083, "num_word_query": 32.0748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2073.7564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5806, "query_norm": 1.4748, "queue_k_norm": 1.5501, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4398, "sent_len_1": 66.787, "sent_len_max_0": 127.9912, "sent_len_max_1": 191.985, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 54.1016, "active_queue_size": 16384.0, "cl_loss": 3.0541, "doc_norm": 1.5524, "encoder_q-embeddings": 1798.3381, "encoder_q-layer.0": 1249.6681, "encoder_q-layer.1": 1388.064, "encoder_q-layer.10": 1255.0638, "encoder_q-layer.11": 2955.6426, "encoder_q-layer.2": 1598.5952, "encoder_q-layer.3": 1699.8899, "encoder_q-layer.4": 1802.823, "encoder_q-layer.5": 1843.0225, "encoder_q-layer.6": 2061.3096, "encoder_q-layer.7": 2166.0024, "encoder_q-layer.8": 2177.811, "encoder_q-layer.9": 1477.9717, "epoch": 0.81, "inbatch_neg_score": 0.5803, "inbatch_pos_score": 1.2568, "learning_rate": 9.444444444444445e-06, "loss": 3.0541, "norm_diff": 0.0917, "norm_loss": 0.0, "num_token_doc": 66.7773, "num_token_overlap": 15.8221, "num_token_query": 42.4442, "num_token_union": 68.5179, "num_word_context": 202.4566, "num_word_doc": 49.8431, "num_word_query": 32.0564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2803.3935, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5796, "query_norm": 1.4607, "queue_k_norm": 1.5506, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4442, "sent_len_1": 66.7773, "sent_len_max_0": 128.0, "sent_len_max_1": 192.05, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0699, "doc_norm": 1.546, "encoder_q-embeddings": 987.8787, "encoder_q-layer.0": 653.9666, "encoder_q-layer.1": 690.5718, "encoder_q-layer.10": 1237.53, "encoder_q-layer.11": 2887.1323, "encoder_q-layer.2": 788.8558, "encoder_q-layer.3": 824.145, "encoder_q-layer.4": 865.9609, "encoder_q-layer.5": 845.2955, "encoder_q-layer.6": 1002.2027, "encoder_q-layer.7": 1080.8241, "encoder_q-layer.8": 1266.9723, "encoder_q-layer.9": 1158.2131, "epoch": 0.81, "inbatch_neg_score": 0.5787, "inbatch_pos_score": 1.2637, "learning_rate": 9.388888888888889e-06, "loss": 3.0699, "norm_diff": 0.0767, "norm_loss": 0.0, "num_token_doc": 66.7784, "num_token_overlap": 15.8105, "num_token_query": 42.2687, "num_token_union": 68.4437, "num_word_context": 202.0038, "num_word_doc": 49.8178, "num_word_query": 31.9333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1885.9617, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5781, "query_norm": 1.4693, "queue_k_norm": 1.5508, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2687, "sent_len_1": 66.7784, "sent_len_max_0": 128.0, "sent_len_max_1": 190.535, "stdk": 0.0485, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0614, "doc_norm": 1.5599, "encoder_q-embeddings": 1943.4904, "encoder_q-layer.0": 1376.9286, "encoder_q-layer.1": 1566.5367, "encoder_q-layer.10": 1225.8331, "encoder_q-layer.11": 2913.3999, "encoder_q-layer.2": 1878.1896, "encoder_q-layer.3": 1889.2948, "encoder_q-layer.4": 1962.7903, "encoder_q-layer.5": 2126.0496, "encoder_q-layer.6": 1994.11, "encoder_q-layer.7": 2153.2466, "encoder_q-layer.8": 2031.1534, "encoder_q-layer.9": 1319.7145, "epoch": 0.81, "inbatch_neg_score": 0.5787, "inbatch_pos_score": 1.2891, "learning_rate": 9.333333333333334e-06, "loss": 3.0614, "norm_diff": 0.0853, "norm_loss": 0.0, "num_token_doc": 66.7465, "num_token_overlap": 15.8222, "num_token_query": 42.3171, "num_token_union": 68.434, "num_word_context": 201.9729, "num_word_doc": 49.7946, "num_word_query": 31.9781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2853.949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5796, "query_norm": 1.4747, "queue_k_norm": 1.5517, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3171, "sent_len_1": 66.7465, "sent_len_max_0": 128.0, "sent_len_max_1": 190.6662, "stdk": 0.0491, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0573, "doc_norm": 1.5471, "encoder_q-embeddings": 910.6381, "encoder_q-layer.0": 612.3162, "encoder_q-layer.1": 656.8461, "encoder_q-layer.10": 1166.9343, "encoder_q-layer.11": 2838.0488, "encoder_q-layer.2": 735.2928, "encoder_q-layer.3": 757.2431, "encoder_q-layer.4": 810.5373, "encoder_q-layer.5": 809.1967, "encoder_q-layer.6": 936.4918, "encoder_q-layer.7": 1065.0247, "encoder_q-layer.8": 1202.6388, "encoder_q-layer.9": 1131.8522, "epoch": 0.81, "inbatch_neg_score": 0.578, "inbatch_pos_score": 1.251, "learning_rate": 9.277777777777778e-06, "loss": 3.0573, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 66.8035, "num_token_overlap": 15.7746, "num_token_query": 42.1264, "num_token_union": 68.4249, "num_word_context": 202.0135, "num_word_doc": 49.847, "num_word_query": 31.8138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1821.5475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5806, "query_norm": 1.4707, "queue_k_norm": 1.5524, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1264, "sent_len_1": 66.8035, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.6438, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0527, "doc_norm": 1.5533, "encoder_q-embeddings": 1079.5143, "encoder_q-layer.0": 758.5351, "encoder_q-layer.1": 797.198, "encoder_q-layer.10": 1325.2476, "encoder_q-layer.11": 2812.4978, "encoder_q-layer.2": 931.4125, "encoder_q-layer.3": 944.6943, "encoder_q-layer.4": 984.554, "encoder_q-layer.5": 1012.6429, "encoder_q-layer.6": 1107.9268, "encoder_q-layer.7": 1212.9519, "encoder_q-layer.8": 1416.3151, "encoder_q-layer.9": 1223.8455, "epoch": 0.81, "inbatch_neg_score": 0.5777, "inbatch_pos_score": 1.2832, "learning_rate": 9.222222222222222e-06, "loss": 3.0527, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 67.0363, "num_token_overlap": 15.845, "num_token_query": 42.3583, "num_token_union": 68.6174, "num_word_context": 202.4025, "num_word_doc": 49.9932, "num_word_query": 32.0106, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1978.2607, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5786, "query_norm": 1.481, "queue_k_norm": 1.5529, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3583, "sent_len_1": 67.0363, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0588, "stdk": 0.0488, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0442, "doc_norm": 1.5478, "encoder_q-embeddings": 904.5253, "encoder_q-layer.0": 604.7374, "encoder_q-layer.1": 651.9614, "encoder_q-layer.10": 1172.9049, "encoder_q-layer.11": 2881.1584, "encoder_q-layer.2": 748.9652, "encoder_q-layer.3": 784.4545, "encoder_q-layer.4": 827.2214, "encoder_q-layer.5": 901.0499, "encoder_q-layer.6": 1114.45, "encoder_q-layer.7": 1215.8727, "encoder_q-layer.8": 1255.2534, "encoder_q-layer.9": 1152.1165, "epoch": 0.82, "inbatch_neg_score": 0.5807, "inbatch_pos_score": 1.2607, "learning_rate": 9.166666666666666e-06, "loss": 3.0442, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.6603, "num_token_overlap": 15.9099, "num_token_query": 42.4829, "num_token_union": 68.4316, "num_word_context": 201.8701, "num_word_doc": 49.7211, "num_word_query": 32.109, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1889.0121, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5791, "query_norm": 1.4654, "queue_k_norm": 1.5514, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4829, "sent_len_1": 66.6603, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.8738, "stdk": 0.0485, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0444, "doc_norm": 1.5601, "encoder_q-embeddings": 1166.2675, "encoder_q-layer.0": 792.5603, "encoder_q-layer.1": 840.2954, "encoder_q-layer.10": 1203.0326, "encoder_q-layer.11": 2857.3306, "encoder_q-layer.2": 931.2353, "encoder_q-layer.3": 981.1754, "encoder_q-layer.4": 1028.3767, "encoder_q-layer.5": 1079.9817, "encoder_q-layer.6": 1168.9491, "encoder_q-layer.7": 1245.1459, "encoder_q-layer.8": 1348.4868, "encoder_q-layer.9": 1189.9894, "epoch": 0.82, "inbatch_neg_score": 0.5791, "inbatch_pos_score": 1.2695, "learning_rate": 9.111111111111112e-06, "loss": 3.0444, "norm_diff": 0.0879, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 15.8627, "num_token_query": 42.4482, "num_token_union": 68.516, "num_word_context": 202.3553, "num_word_doc": 49.8294, "num_word_query": 32.0568, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2004.2414, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5806, "query_norm": 1.4723, "queue_k_norm": 1.5536, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4482, "sent_len_1": 66.7993, "sent_len_max_0": 128.0, "sent_len_max_1": 188.8575, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0409, "doc_norm": 1.5562, "encoder_q-embeddings": 2620.5996, "encoder_q-layer.0": 1808.4327, "encoder_q-layer.1": 1999.3145, "encoder_q-layer.10": 2402.9924, "encoder_q-layer.11": 5669.2559, "encoder_q-layer.2": 2358.0989, "encoder_q-layer.3": 2547.9407, "encoder_q-layer.4": 2576.7483, "encoder_q-layer.5": 2645.5146, "encoder_q-layer.6": 2708.5039, "encoder_q-layer.7": 2683.1196, "encoder_q-layer.8": 2714.4695, "encoder_q-layer.9": 2461.9792, "epoch": 0.82, "inbatch_neg_score": 0.5808, "inbatch_pos_score": 1.2832, "learning_rate": 9.055555555555556e-06, "loss": 3.0409, "norm_diff": 0.0833, "norm_loss": 0.0, "num_token_doc": 66.8698, "num_token_overlap": 15.8803, "num_token_query": 42.3816, "num_token_union": 68.5368, "num_word_context": 202.1336, "num_word_doc": 49.8545, "num_word_query": 32.0139, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4327.4069, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5796, "query_norm": 1.4729, "queue_k_norm": 1.5533, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3816, "sent_len_1": 66.8698, "sent_len_max_0": 128.0, "sent_len_max_1": 191.4725, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 3.0606, "doc_norm": 1.5524, "encoder_q-embeddings": 912.9957, "encoder_q-layer.0": 615.0917, "encoder_q-layer.1": 648.1661, "encoder_q-layer.10": 1244.2814, "encoder_q-layer.11": 2978.3945, "encoder_q-layer.2": 727.3837, "encoder_q-layer.3": 739.2941, "encoder_q-layer.4": 784.6182, "encoder_q-layer.5": 811.1004, "encoder_q-layer.6": 904.9057, "encoder_q-layer.7": 1046.9841, "encoder_q-layer.8": 1261.3724, "encoder_q-layer.9": 1176.9609, "epoch": 0.82, "inbatch_neg_score": 0.5837, "inbatch_pos_score": 1.2588, "learning_rate": 9e-06, "loss": 3.0606, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.7796, "num_token_overlap": 15.8633, "num_token_query": 42.3622, "num_token_union": 68.4934, "num_word_context": 202.6107, "num_word_doc": 49.8425, "num_word_query": 32.0022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1870.7895, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.582, "query_norm": 1.4572, "queue_k_norm": 1.5537, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3622, "sent_len_1": 66.7796, "sent_len_max_0": 127.9887, "sent_len_max_1": 189.2163, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0529, "doc_norm": 1.5526, "encoder_q-embeddings": 1079.5594, "encoder_q-layer.0": 733.0002, "encoder_q-layer.1": 804.0214, "encoder_q-layer.10": 1316.4648, "encoder_q-layer.11": 3099.6296, "encoder_q-layer.2": 921.0563, "encoder_q-layer.3": 926.3496, "encoder_q-layer.4": 974.4201, "encoder_q-layer.5": 1057.4189, "encoder_q-layer.6": 1154.8529, "encoder_q-layer.7": 1289.7185, "encoder_q-layer.8": 1539.772, "encoder_q-layer.9": 1302.6613, "epoch": 0.82, "inbatch_neg_score": 0.582, "inbatch_pos_score": 1.2617, "learning_rate": 8.944444444444444e-06, "loss": 3.0529, "norm_diff": 0.08, "norm_loss": 0.0, "num_token_doc": 66.6591, "num_token_overlap": 15.8325, "num_token_query": 42.3887, "num_token_union": 68.4427, "num_word_context": 202.1168, "num_word_doc": 49.7503, "num_word_query": 32.026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2071.1465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.584, "query_norm": 1.4726, "queue_k_norm": 1.5532, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3887, "sent_len_1": 66.6591, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.62, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0312, "doc_norm": 1.5502, "encoder_q-embeddings": 1096.7916, "encoder_q-layer.0": 720.9047, "encoder_q-layer.1": 773.009, "encoder_q-layer.10": 1246.0938, "encoder_q-layer.11": 2938.3416, "encoder_q-layer.2": 851.8741, "encoder_q-layer.3": 894.6412, "encoder_q-layer.4": 930.5788, "encoder_q-layer.5": 949.5664, "encoder_q-layer.6": 1043.0029, "encoder_q-layer.7": 1221.3438, "encoder_q-layer.8": 1277.0459, "encoder_q-layer.9": 1195.2369, "epoch": 0.82, "inbatch_neg_score": 0.5842, "inbatch_pos_score": 1.2588, "learning_rate": 8.88888888888889e-06, "loss": 3.0312, "norm_diff": 0.073, "norm_loss": 0.0, "num_token_doc": 67.0622, "num_token_overlap": 15.9093, "num_token_query": 42.4898, "num_token_union": 68.6465, "num_word_context": 202.8653, "num_word_doc": 50.0871, "num_word_query": 32.1294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1965.8932, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5854, "query_norm": 1.4772, "queue_k_norm": 1.5563, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4898, "sent_len_1": 67.0622, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1238, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0595, "doc_norm": 1.5576, "encoder_q-embeddings": 1002.978, "encoder_q-layer.0": 678.5585, "encoder_q-layer.1": 725.8526, "encoder_q-layer.10": 1173.0179, "encoder_q-layer.11": 2874.6824, "encoder_q-layer.2": 836.823, "encoder_q-layer.3": 874.0269, "encoder_q-layer.4": 911.7811, "encoder_q-layer.5": 919.2708, "encoder_q-layer.6": 1047.5685, "encoder_q-layer.7": 1152.5692, "encoder_q-layer.8": 1312.0958, "encoder_q-layer.9": 1168.2399, "epoch": 0.82, "inbatch_neg_score": 0.586, "inbatch_pos_score": 1.2676, "learning_rate": 8.833333333333334e-06, "loss": 3.0595, "norm_diff": 0.0872, "norm_loss": 0.0, "num_token_doc": 66.8494, "num_token_overlap": 15.8948, "num_token_query": 42.4498, "num_token_union": 68.5585, "num_word_context": 202.6341, "num_word_doc": 49.8594, "num_word_query": 32.0682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1925.7947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5859, "query_norm": 1.4703, "queue_k_norm": 1.5546, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4498, "sent_len_1": 66.8494, "sent_len_max_0": 128.0, "sent_len_max_1": 190.345, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.055, "doc_norm": 1.5581, "encoder_q-embeddings": 1936.999, "encoder_q-layer.0": 1309.0039, "encoder_q-layer.1": 1359.9512, "encoder_q-layer.10": 1194.8623, "encoder_q-layer.11": 2908.4277, "encoder_q-layer.2": 1538.0626, "encoder_q-layer.3": 1690.0154, "encoder_q-layer.4": 1933.3734, "encoder_q-layer.5": 1987.7158, "encoder_q-layer.6": 1950.4622, "encoder_q-layer.7": 1711.3824, "encoder_q-layer.8": 1595.3707, "encoder_q-layer.9": 1230.6429, "epoch": 0.82, "inbatch_neg_score": 0.5849, "inbatch_pos_score": 1.2744, "learning_rate": 8.777777777777778e-06, "loss": 3.055, "norm_diff": 0.0877, "norm_loss": 0.0, "num_token_doc": 66.8578, "num_token_overlap": 15.8844, "num_token_query": 42.4664, "num_token_union": 68.5738, "num_word_context": 202.328, "num_word_doc": 49.8618, "num_word_query": 32.0934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2700.9765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.584, "query_norm": 1.4704, "queue_k_norm": 1.5533, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4664, "sent_len_1": 66.8578, "sent_len_max_0": 128.0, "sent_len_max_1": 189.0325, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 55.4688, "active_queue_size": 16384.0, "cl_loss": 3.0754, "doc_norm": 1.5563, "encoder_q-embeddings": 1010.7017, "encoder_q-layer.0": 671.5614, "encoder_q-layer.1": 719.814, "encoder_q-layer.10": 1249.8077, "encoder_q-layer.11": 3028.0476, "encoder_q-layer.2": 833.8446, "encoder_q-layer.3": 881.6121, "encoder_q-layer.4": 948.3119, "encoder_q-layer.5": 1001.177, "encoder_q-layer.6": 1101.3611, "encoder_q-layer.7": 1222.7079, "encoder_q-layer.8": 1268.6039, "encoder_q-layer.9": 1158.936, "epoch": 0.82, "inbatch_neg_score": 0.5862, "inbatch_pos_score": 1.2725, "learning_rate": 8.722222222222224e-06, "loss": 3.0754, "norm_diff": 0.09, "norm_loss": 0.0, "num_token_doc": 66.6695, "num_token_overlap": 15.8416, "num_token_query": 42.2665, "num_token_union": 68.3552, "num_word_context": 202.3511, "num_word_doc": 49.7642, "num_word_query": 31.9035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1979.8753, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5864, "query_norm": 1.4664, "queue_k_norm": 1.5547, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2665, "sent_len_1": 66.6695, "sent_len_max_0": 128.0, "sent_len_max_1": 190.1275, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0773, "doc_norm": 1.5561, "encoder_q-embeddings": 2043.1129, "encoder_q-layer.0": 1335.2476, "encoder_q-layer.1": 1667.609, "encoder_q-layer.10": 1191.7281, "encoder_q-layer.11": 2903.002, "encoder_q-layer.2": 2252.9214, "encoder_q-layer.3": 2455.7046, "encoder_q-layer.4": 2518.3076, "encoder_q-layer.5": 2431.949, "encoder_q-layer.6": 2381.4551, "encoder_q-layer.7": 2361.1479, "encoder_q-layer.8": 2100.9348, "encoder_q-layer.9": 1417.3101, "epoch": 0.82, "inbatch_neg_score": 0.5895, "inbatch_pos_score": 1.2676, "learning_rate": 8.666666666666668e-06, "loss": 3.0773, "norm_diff": 0.0844, "norm_loss": 0.0, "num_token_doc": 66.6127, "num_token_overlap": 15.8159, "num_token_query": 42.3076, "num_token_union": 68.3553, "num_word_context": 202.2139, "num_word_doc": 49.6657, "num_word_query": 31.9495, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3167.9028, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5879, "query_norm": 1.4717, "queue_k_norm": 1.5535, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3076, "sent_len_1": 66.6127, "sent_len_max_0": 128.0, "sent_len_max_1": 190.9613, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.063, "doc_norm": 1.5528, "encoder_q-embeddings": 2181.9365, "encoder_q-layer.0": 1489.9563, "encoder_q-layer.1": 1785.8141, "encoder_q-layer.10": 1314.9803, "encoder_q-layer.11": 3045.1921, "encoder_q-layer.2": 2115.8242, "encoder_q-layer.3": 2399.4207, "encoder_q-layer.4": 2320.8037, "encoder_q-layer.5": 2361.4265, "encoder_q-layer.6": 2424.4426, "encoder_q-layer.7": 2264.4849, "encoder_q-layer.8": 2319.5942, "encoder_q-layer.9": 1576.4092, "epoch": 0.82, "inbatch_neg_score": 0.5887, "inbatch_pos_score": 1.2754, "learning_rate": 8.611111111111112e-06, "loss": 3.063, "norm_diff": 0.0743, "norm_loss": 0.0, "num_token_doc": 66.7831, "num_token_overlap": 15.7384, "num_token_query": 42.3027, "num_token_union": 68.5681, "num_word_context": 202.6478, "num_word_doc": 49.8055, "num_word_query": 31.9236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3260.4108, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5884, "query_norm": 1.4786, "queue_k_norm": 1.5539, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3027, "sent_len_1": 66.7831, "sent_len_max_0": 128.0, "sent_len_max_1": 189.6425, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.0593, "doc_norm": 1.5592, "encoder_q-embeddings": 1836.6432, "encoder_q-layer.0": 1452.2106, "encoder_q-layer.1": 1906.9001, "encoder_q-layer.10": 1185.4719, "encoder_q-layer.11": 2820.6599, "encoder_q-layer.2": 2505.1121, "encoder_q-layer.3": 2372.9106, "encoder_q-layer.4": 2777.3904, "encoder_q-layer.5": 2732.5369, "encoder_q-layer.6": 3094.8772, "encoder_q-layer.7": 3007.1477, "encoder_q-layer.8": 2321.7117, "encoder_q-layer.9": 1405.9337, "epoch": 0.83, "inbatch_neg_score": 0.5868, "inbatch_pos_score": 1.2871, "learning_rate": 8.555555555555556e-06, "loss": 3.0593, "norm_diff": 0.0869, "norm_loss": 0.0, "num_token_doc": 66.7143, "num_token_overlap": 15.7575, "num_token_query": 42.0437, "num_token_union": 68.304, "num_word_context": 201.8884, "num_word_doc": 49.7619, "num_word_query": 31.756, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3533.4084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5869, "query_norm": 1.4723, "queue_k_norm": 1.5538, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.0437, "sent_len_1": 66.7143, "sent_len_max_0": 127.9813, "sent_len_max_1": 189.7125, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0658, "doc_norm": 1.5514, "encoder_q-embeddings": 1007.7435, "encoder_q-layer.0": 673.7172, "encoder_q-layer.1": 739.6144, "encoder_q-layer.10": 1227.3021, "encoder_q-layer.11": 2967.2805, "encoder_q-layer.2": 824.6536, "encoder_q-layer.3": 855.9774, "encoder_q-layer.4": 900.129, "encoder_q-layer.5": 935.9704, "encoder_q-layer.6": 1003.3406, "encoder_q-layer.7": 1124.6152, "encoder_q-layer.8": 1294.7013, "encoder_q-layer.9": 1216.2863, "epoch": 0.83, "inbatch_neg_score": 0.5862, "inbatch_pos_score": 1.2627, "learning_rate": 8.500000000000002e-06, "loss": 3.0658, "norm_diff": 0.0816, "norm_loss": 0.0, "num_token_doc": 66.6322, "num_token_overlap": 15.7746, "num_token_query": 42.3677, "num_token_union": 68.4886, "num_word_context": 202.1075, "num_word_doc": 49.7441, "num_word_query": 31.9945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.5292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5854, "query_norm": 1.4698, "queue_k_norm": 1.553, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3677, "sent_len_1": 66.6322, "sent_len_max_0": 127.9938, "sent_len_max_1": 187.92, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 3.0814, "doc_norm": 1.5551, "encoder_q-embeddings": 1030.1169, "encoder_q-layer.0": 705.1492, "encoder_q-layer.1": 734.0303, "encoder_q-layer.10": 1323.8495, "encoder_q-layer.11": 2922.7129, "encoder_q-layer.2": 857.2394, "encoder_q-layer.3": 868.1238, "encoder_q-layer.4": 961.8745, "encoder_q-layer.5": 957.5854, "encoder_q-layer.6": 1130.3536, "encoder_q-layer.7": 1223.6008, "encoder_q-layer.8": 1379.5959, "encoder_q-layer.9": 1198.8691, "epoch": 0.83, "inbatch_neg_score": 0.5893, "inbatch_pos_score": 1.291, "learning_rate": 8.444444444444446e-06, "loss": 3.0814, "norm_diff": 0.0765, "norm_loss": 0.0, "num_token_doc": 66.7241, "num_token_overlap": 15.8012, "num_token_query": 42.409, "num_token_union": 68.5744, "num_word_context": 202.2052, "num_word_doc": 49.855, "num_word_query": 32.0319, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.2906, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5913, "query_norm": 1.4786, "queue_k_norm": 1.5539, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.409, "sent_len_1": 66.7241, "sent_len_max_0": 127.99, "sent_len_max_1": 187.1, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0609, "doc_norm": 1.5607, "encoder_q-embeddings": 1680.3901, "encoder_q-layer.0": 1150.4617, "encoder_q-layer.1": 1223.1691, "encoder_q-layer.10": 1213.4485, "encoder_q-layer.11": 2920.5444, "encoder_q-layer.2": 1492.868, "encoder_q-layer.3": 1479.2134, "encoder_q-layer.4": 1685.8094, "encoder_q-layer.5": 2026.251, "encoder_q-layer.6": 2063.4875, "encoder_q-layer.7": 2562.728, "encoder_q-layer.8": 1967.5865, "encoder_q-layer.9": 1183.0726, "epoch": 0.83, "inbatch_neg_score": 0.5868, "inbatch_pos_score": 1.2715, "learning_rate": 8.38888888888889e-06, "loss": 3.0609, "norm_diff": 0.0897, "norm_loss": 0.0, "num_token_doc": 66.9465, "num_token_overlap": 15.8207, "num_token_query": 42.3747, "num_token_union": 68.6597, "num_word_context": 202.5277, "num_word_doc": 49.939, "num_word_query": 32.008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2800.1728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5879, "query_norm": 1.4711, "queue_k_norm": 1.5543, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3747, "sent_len_1": 66.9465, "sent_len_max_0": 128.0, "sent_len_max_1": 190.275, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 55.0781, "active_queue_size": 16384.0, "cl_loss": 3.0555, "doc_norm": 1.5496, "encoder_q-embeddings": 2146.2114, "encoder_q-layer.0": 1407.0586, "encoder_q-layer.1": 1654.7738, "encoder_q-layer.10": 1221.788, "encoder_q-layer.11": 2843.6465, "encoder_q-layer.2": 2000.9037, "encoder_q-layer.3": 2199.5642, "encoder_q-layer.4": 2229.7437, "encoder_q-layer.5": 2224.1929, "encoder_q-layer.6": 1878.7062, "encoder_q-layer.7": 1991.3977, "encoder_q-layer.8": 1837.1699, "encoder_q-layer.9": 1299.1602, "epoch": 0.83, "inbatch_neg_score": 0.5901, "inbatch_pos_score": 1.2715, "learning_rate": 8.333333333333334e-06, "loss": 3.0555, "norm_diff": 0.0809, "norm_loss": 0.0, "num_token_doc": 66.8731, "num_token_overlap": 15.7402, "num_token_query": 42.1883, "num_token_union": 68.5408, "num_word_context": 202.6516, "num_word_doc": 49.8885, "num_word_query": 31.8657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3011.8263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5894, "query_norm": 1.4687, "queue_k_norm": 1.5545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1883, "sent_len_1": 66.8731, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.5462, "stdk": 0.0485, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 53.2227, "active_queue_size": 16384.0, "cl_loss": 3.0689, "doc_norm": 1.5545, "encoder_q-embeddings": 1375.251, "encoder_q-layer.0": 943.8357, "encoder_q-layer.1": 1071.7843, "encoder_q-layer.10": 1311.7075, "encoder_q-layer.11": 3056.6216, "encoder_q-layer.2": 1238.7344, "encoder_q-layer.3": 1319.575, "encoder_q-layer.4": 1378.8582, "encoder_q-layer.5": 1447.6467, "encoder_q-layer.6": 1504.6561, "encoder_q-layer.7": 1484.7725, "encoder_q-layer.8": 1420.3154, "encoder_q-layer.9": 1242.1589, "epoch": 0.83, "inbatch_neg_score": 0.5908, "inbatch_pos_score": 1.2617, "learning_rate": 8.27777777777778e-06, "loss": 3.0689, "norm_diff": 0.0986, "norm_loss": 0.0, "num_token_doc": 66.4498, "num_token_overlap": 15.8575, "num_token_query": 42.4221, "num_token_union": 68.291, "num_word_context": 201.9336, "num_word_doc": 49.5963, "num_word_query": 32.0359, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2318.3479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5889, "query_norm": 1.4559, "queue_k_norm": 1.555, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4221, "sent_len_1": 66.4498, "sent_len_max_0": 127.9875, "sent_len_max_1": 190.4038, "stdk": 0.0487, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0487, "doc_norm": 1.559, "encoder_q-embeddings": 990.738, "encoder_q-layer.0": 682.2546, "encoder_q-layer.1": 741.757, "encoder_q-layer.10": 1258.556, "encoder_q-layer.11": 2909.8076, "encoder_q-layer.2": 812.0009, "encoder_q-layer.3": 804.4584, "encoder_q-layer.4": 835.2368, "encoder_q-layer.5": 867.899, "encoder_q-layer.6": 941.0578, "encoder_q-layer.7": 1107.3971, "encoder_q-layer.8": 1195.9506, "encoder_q-layer.9": 1078.2262, "epoch": 0.83, "inbatch_neg_score": 0.5902, "inbatch_pos_score": 1.2705, "learning_rate": 8.222222222222223e-06, "loss": 3.0487, "norm_diff": 0.0942, "norm_loss": 0.0, "num_token_doc": 66.6385, "num_token_overlap": 15.8524, "num_token_query": 42.3517, "num_token_union": 68.3574, "num_word_context": 201.891, "num_word_doc": 49.764, "num_word_query": 32.0005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1864.2581, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5889, "query_norm": 1.4647, "queue_k_norm": 1.5552, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3517, "sent_len_1": 66.6385, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4325, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0527, "doc_norm": 1.5563, "encoder_q-embeddings": 979.9824, "encoder_q-layer.0": 619.1602, "encoder_q-layer.1": 661.8787, "encoder_q-layer.10": 1247.8046, "encoder_q-layer.11": 2896.1406, "encoder_q-layer.2": 740.3447, "encoder_q-layer.3": 798.1718, "encoder_q-layer.4": 837.9247, "encoder_q-layer.5": 862.5541, "encoder_q-layer.6": 999.3765, "encoder_q-layer.7": 1178.5627, "encoder_q-layer.8": 1297.0791, "encoder_q-layer.9": 1137.2876, "epoch": 0.83, "inbatch_neg_score": 0.5893, "inbatch_pos_score": 1.2705, "learning_rate": 8.166666666666668e-06, "loss": 3.0527, "norm_diff": 0.0908, "norm_loss": 0.0, "num_token_doc": 66.6326, "num_token_overlap": 15.8139, "num_token_query": 42.3629, "num_token_union": 68.4103, "num_word_context": 202.4742, "num_word_doc": 49.729, "num_word_query": 31.9784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1897.4484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5903, "query_norm": 1.4655, "queue_k_norm": 1.5545, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3629, "sent_len_1": 66.6326, "sent_len_max_0": 128.0, "sent_len_max_1": 189.935, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.0762, "doc_norm": 1.546, "encoder_q-embeddings": 1357.8394, "encoder_q-layer.0": 930.2739, "encoder_q-layer.1": 1078.178, "encoder_q-layer.10": 1211.8389, "encoder_q-layer.11": 3028.9707, "encoder_q-layer.2": 1192.6227, "encoder_q-layer.3": 1314.2002, "encoder_q-layer.4": 1376.6852, "encoder_q-layer.5": 1465.3781, "encoder_q-layer.6": 1540.9207, "encoder_q-layer.7": 1484.2593, "encoder_q-layer.8": 1399.6763, "encoder_q-layer.9": 1187.7836, "epoch": 0.83, "inbatch_neg_score": 0.5925, "inbatch_pos_score": 1.252, "learning_rate": 8.111111111111112e-06, "loss": 3.0762, "norm_diff": 0.0832, "norm_loss": 0.0, "num_token_doc": 66.5432, "num_token_overlap": 15.7652, "num_token_query": 42.252, "num_token_union": 68.335, "num_word_context": 202.0508, "num_word_doc": 49.6432, "num_word_query": 31.9193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2275.5503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.4628, "queue_k_norm": 1.5555, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.252, "sent_len_1": 66.5432, "sent_len_max_0": 128.0, "sent_len_max_1": 188.9162, "stdk": 0.0484, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0592, "doc_norm": 1.5576, "encoder_q-embeddings": 3407.96, "encoder_q-layer.0": 2507.5808, "encoder_q-layer.1": 2577.2349, "encoder_q-layer.10": 1209.98, "encoder_q-layer.11": 2965.4563, "encoder_q-layer.2": 2831.3484, "encoder_q-layer.3": 3025.8921, "encoder_q-layer.4": 2841.9092, "encoder_q-layer.5": 2822.5359, "encoder_q-layer.6": 2882.7378, "encoder_q-layer.7": 2688.0232, "encoder_q-layer.8": 2172.9221, "encoder_q-layer.9": 1382.3218, "epoch": 0.83, "inbatch_neg_score": 0.5903, "inbatch_pos_score": 1.2822, "learning_rate": 8.055555555555557e-06, "loss": 3.0592, "norm_diff": 0.0946, "norm_loss": 0.0, "num_token_doc": 66.8173, "num_token_overlap": 15.863, "num_token_query": 42.5917, "num_token_union": 68.6307, "num_word_context": 202.3662, "num_word_doc": 49.8011, "num_word_query": 32.1927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4125.3348, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5908, "query_norm": 1.463, "queue_k_norm": 1.5559, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5917, "sent_len_1": 66.8173, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.4712, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.0387, "doc_norm": 1.5571, "encoder_q-embeddings": 1132.5406, "encoder_q-layer.0": 780.2081, "encoder_q-layer.1": 865.5889, "encoder_q-layer.10": 1350.6367, "encoder_q-layer.11": 2976.4062, "encoder_q-layer.2": 980.111, "encoder_q-layer.3": 958.8579, "encoder_q-layer.4": 982.6547, "encoder_q-layer.5": 917.8372, "encoder_q-layer.6": 1018.1916, "encoder_q-layer.7": 1161.9492, "encoder_q-layer.8": 1355.2029, "encoder_q-layer.9": 1214.3485, "epoch": 0.84, "inbatch_neg_score": 0.5917, "inbatch_pos_score": 1.2695, "learning_rate": 8.000000000000001e-06, "loss": 3.0387, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.8608, "num_token_overlap": 15.8591, "num_token_query": 42.385, "num_token_union": 68.5638, "num_word_context": 202.2949, "num_word_doc": 49.896, "num_word_query": 31.995, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2013.2529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.4747, "queue_k_norm": 1.5554, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.385, "sent_len_1": 66.8608, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.9162, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 60.6445, "active_queue_size": 16384.0, "cl_loss": 3.0386, "doc_norm": 1.5581, "encoder_q-embeddings": 917.3827, "encoder_q-layer.0": 631.0566, "encoder_q-layer.1": 697.0438, "encoder_q-layer.10": 1270.4601, "encoder_q-layer.11": 2754.4224, "encoder_q-layer.2": 791.1799, "encoder_q-layer.3": 823.2691, "encoder_q-layer.4": 865.6398, "encoder_q-layer.5": 900.9398, "encoder_q-layer.6": 1012.9222, "encoder_q-layer.7": 1081.6422, "encoder_q-layer.8": 1243.0797, "encoder_q-layer.9": 1137.8053, "epoch": 0.84, "inbatch_neg_score": 0.5907, "inbatch_pos_score": 1.3057, "learning_rate": 7.944444444444445e-06, "loss": 3.0386, "norm_diff": 0.0755, "norm_loss": 0.0, "num_token_doc": 66.7048, "num_token_overlap": 15.7983, "num_token_query": 42.3206, "num_token_union": 68.4483, "num_word_context": 202.2438, "num_word_doc": 49.7918, "num_word_query": 31.9586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1850.9401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.4826, "queue_k_norm": 1.5586, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3206, "sent_len_1": 66.7048, "sent_len_max_0": 128.0, "sent_len_max_1": 189.615, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.054, "doc_norm": 1.5487, "encoder_q-embeddings": 1139.2092, "encoder_q-layer.0": 754.4092, "encoder_q-layer.1": 846.1111, "encoder_q-layer.10": 1341.105, "encoder_q-layer.11": 3046.7097, "encoder_q-layer.2": 951.3027, "encoder_q-layer.3": 1017.2943, "encoder_q-layer.4": 1080.1935, "encoder_q-layer.5": 1099.0505, "encoder_q-layer.6": 1204.5803, "encoder_q-layer.7": 1251.4542, "encoder_q-layer.8": 1401.4501, "encoder_q-layer.9": 1182.2686, "epoch": 0.84, "inbatch_neg_score": 0.5954, "inbatch_pos_score": 1.2539, "learning_rate": 7.88888888888889e-06, "loss": 3.054, "norm_diff": 0.0837, "norm_loss": 0.0, "num_token_doc": 66.777, "num_token_overlap": 15.845, "num_token_query": 42.4211, "num_token_union": 68.522, "num_word_context": 202.3284, "num_word_doc": 49.8122, "num_word_query": 32.0466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.5507, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5942, "query_norm": 1.4649, "queue_k_norm": 1.5564, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4211, "sent_len_1": 66.777, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.4325, "stdk": 0.0484, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.0589, "doc_norm": 1.5543, "encoder_q-embeddings": 954.9436, "encoder_q-layer.0": 627.4267, "encoder_q-layer.1": 680.3032, "encoder_q-layer.10": 1320.3298, "encoder_q-layer.11": 3042.2744, "encoder_q-layer.2": 764.493, "encoder_q-layer.3": 800.7586, "encoder_q-layer.4": 838.7794, "encoder_q-layer.5": 826.77, "encoder_q-layer.6": 980.6326, "encoder_q-layer.7": 1098.9987, "encoder_q-layer.8": 1306.8186, "encoder_q-layer.9": 1198.6562, "epoch": 0.84, "inbatch_neg_score": 0.5925, "inbatch_pos_score": 1.2617, "learning_rate": 7.833333333333333e-06, "loss": 3.0589, "norm_diff": 0.0885, "norm_loss": 0.0, "num_token_doc": 66.7944, "num_token_overlap": 15.8147, "num_token_query": 42.3739, "num_token_union": 68.546, "num_word_context": 202.5967, "num_word_doc": 49.8355, "num_word_query": 32.0166, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1941.511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.4657, "queue_k_norm": 1.5579, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3739, "sent_len_1": 66.7944, "sent_len_max_0": 127.9925, "sent_len_max_1": 188.955, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0466, "doc_norm": 1.5576, "encoder_q-embeddings": 1191.2509, "encoder_q-layer.0": 777.0975, "encoder_q-layer.1": 790.114, "encoder_q-layer.10": 1381.7747, "encoder_q-layer.11": 3187.1914, "encoder_q-layer.2": 863.9895, "encoder_q-layer.3": 907.134, "encoder_q-layer.4": 931.6222, "encoder_q-layer.5": 953.3469, "encoder_q-layer.6": 1044.6343, "encoder_q-layer.7": 1157.3645, "encoder_q-layer.8": 1331.3569, "encoder_q-layer.9": 1284.2581, "epoch": 0.84, "inbatch_neg_score": 0.5962, "inbatch_pos_score": 1.2744, "learning_rate": 7.777777777777777e-06, "loss": 3.0466, "norm_diff": 0.0792, "norm_loss": 0.0, "num_token_doc": 66.9306, "num_token_overlap": 15.8811, "num_token_query": 42.3711, "num_token_union": 68.5821, "num_word_context": 202.4864, "num_word_doc": 49.9456, "num_word_query": 32.0024, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2070.4153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5957, "query_norm": 1.4785, "queue_k_norm": 1.5589, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3711, "sent_len_1": 66.9306, "sent_len_max_0": 128.0, "sent_len_max_1": 189.755, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0489, "doc_norm": 1.5569, "encoder_q-embeddings": 2225.3928, "encoder_q-layer.0": 1516.1421, "encoder_q-layer.1": 1751.2203, "encoder_q-layer.10": 1408.7299, "encoder_q-layer.11": 3156.2686, "encoder_q-layer.2": 2027.7583, "encoder_q-layer.3": 2209.9324, "encoder_q-layer.4": 2300.0044, "encoder_q-layer.5": 2422.7517, "encoder_q-layer.6": 2348.2578, "encoder_q-layer.7": 2208.4124, "encoder_q-layer.8": 2168.8115, "encoder_q-layer.9": 1504.5397, "epoch": 0.84, "inbatch_neg_score": 0.5914, "inbatch_pos_score": 1.2617, "learning_rate": 7.722222222222223e-06, "loss": 3.0489, "norm_diff": 0.0949, "norm_loss": 0.0, "num_token_doc": 66.6168, "num_token_overlap": 15.8562, "num_token_query": 42.3906, "num_token_union": 68.3805, "num_word_context": 202.1222, "num_word_doc": 49.7204, "num_word_query": 32.0103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3226.5716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.462, "queue_k_norm": 1.558, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3906, "sent_len_1": 66.6168, "sent_len_max_0": 128.0, "sent_len_max_1": 189.28, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0289, "doc_norm": 1.5604, "encoder_q-embeddings": 1111.6619, "encoder_q-layer.0": 761.7621, "encoder_q-layer.1": 879.3871, "encoder_q-layer.10": 1347.3503, "encoder_q-layer.11": 2930.7749, "encoder_q-layer.2": 937.3516, "encoder_q-layer.3": 983.3701, "encoder_q-layer.4": 1033.2031, "encoder_q-layer.5": 1046.5216, "encoder_q-layer.6": 1143.7477, "encoder_q-layer.7": 1211.4325, "encoder_q-layer.8": 1353.3571, "encoder_q-layer.9": 1206.1653, "epoch": 0.84, "inbatch_neg_score": 0.5946, "inbatch_pos_score": 1.2852, "learning_rate": 7.666666666666667e-06, "loss": 3.0289, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.88, "num_token_overlap": 15.9464, "num_token_query": 42.6042, "num_token_union": 68.5449, "num_word_context": 202.265, "num_word_doc": 49.8224, "num_word_query": 32.1708, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2042.7146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5933, "query_norm": 1.4708, "queue_k_norm": 1.5578, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.6042, "sent_len_1": 66.88, "sent_len_max_0": 127.9587, "sent_len_max_1": 191.3038, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0598, "doc_norm": 1.5554, "encoder_q-embeddings": 1063.1938, "encoder_q-layer.0": 731.6391, "encoder_q-layer.1": 770.9662, "encoder_q-layer.10": 1253.1331, "encoder_q-layer.11": 2889.3872, "encoder_q-layer.2": 892.2356, "encoder_q-layer.3": 939.8542, "encoder_q-layer.4": 963.7588, "encoder_q-layer.5": 1017.5822, "encoder_q-layer.6": 1139.8613, "encoder_q-layer.7": 1226.1953, "encoder_q-layer.8": 1379.175, "encoder_q-layer.9": 1170.4827, "epoch": 0.84, "inbatch_neg_score": 0.5955, "inbatch_pos_score": 1.29, "learning_rate": 7.611111111111112e-06, "loss": 3.0598, "norm_diff": 0.0763, "norm_loss": 0.0, "num_token_doc": 66.8285, "num_token_overlap": 15.8003, "num_token_query": 42.3089, "num_token_union": 68.5239, "num_word_context": 202.3206, "num_word_doc": 49.8962, "num_word_query": 31.9626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1987.9478, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5957, "query_norm": 1.4791, "queue_k_norm": 1.5576, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3089, "sent_len_1": 66.8285, "sent_len_max_0": 128.0, "sent_len_max_1": 188.0275, "stdk": 0.0487, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 54.9805, "active_queue_size": 16384.0, "cl_loss": 3.0651, "doc_norm": 1.5594, "encoder_q-embeddings": 957.9949, "encoder_q-layer.0": 652.2947, "encoder_q-layer.1": 709.53, "encoder_q-layer.10": 1226.928, "encoder_q-layer.11": 2960.9143, "encoder_q-layer.2": 798.2975, "encoder_q-layer.3": 830.0703, "encoder_q-layer.4": 869.5901, "encoder_q-layer.5": 927.5422, "encoder_q-layer.6": 998.0317, "encoder_q-layer.7": 1124.2419, "encoder_q-layer.8": 1274.142, "encoder_q-layer.9": 1165.2145, "epoch": 0.84, "inbatch_neg_score": 0.5929, "inbatch_pos_score": 1.2812, "learning_rate": 7.555555555555556e-06, "loss": 3.0651, "norm_diff": 0.0821, "norm_loss": 0.0, "num_token_doc": 66.5785, "num_token_overlap": 15.7586, "num_token_query": 42.215, "num_token_union": 68.3749, "num_word_context": 201.9112, "num_word_doc": 49.6994, "num_word_query": 31.9184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1914.416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5938, "query_norm": 1.4773, "queue_k_norm": 1.5563, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.215, "sent_len_1": 66.5785, "sent_len_max_0": 127.995, "sent_len_max_1": 188.8, "stdk": 0.0489, "stdq": 0.0454, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0573, "doc_norm": 1.5585, "encoder_q-embeddings": 4162.0688, "encoder_q-layer.0": 2729.7615, "encoder_q-layer.1": 3320.8723, "encoder_q-layer.10": 1162.844, "encoder_q-layer.11": 2959.7798, "encoder_q-layer.2": 3822.2461, "encoder_q-layer.3": 4663.1797, "encoder_q-layer.4": 5219.8047, "encoder_q-layer.5": 6112.5127, "encoder_q-layer.6": 5118.4834, "encoder_q-layer.7": 3634.2314, "encoder_q-layer.8": 2236.3201, "encoder_q-layer.9": 1238.7047, "epoch": 0.84, "inbatch_neg_score": 0.5949, "inbatch_pos_score": 1.2783, "learning_rate": 7.5e-06, "loss": 3.0573, "norm_diff": 0.0858, "norm_loss": 0.0, "num_token_doc": 66.6721, "num_token_overlap": 15.8073, "num_token_query": 42.4513, "num_token_union": 68.4895, "num_word_context": 202.3857, "num_word_doc": 49.7169, "num_word_query": 32.0765, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5725.294, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5957, "query_norm": 1.4727, "queue_k_norm": 1.5584, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4513, "sent_len_1": 66.6721, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0175, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0492, "doc_norm": 1.557, "encoder_q-embeddings": 1215.0906, "encoder_q-layer.0": 805.1279, "encoder_q-layer.1": 869.5848, "encoder_q-layer.10": 1377.1941, "encoder_q-layer.11": 3006.8652, "encoder_q-layer.2": 1007.3212, "encoder_q-layer.3": 1087.2961, "encoder_q-layer.4": 1135.9137, "encoder_q-layer.5": 1143.9845, "encoder_q-layer.6": 1349.7924, "encoder_q-layer.7": 1358.1305, "encoder_q-layer.8": 1396.515, "encoder_q-layer.9": 1255.0479, "epoch": 0.85, "inbatch_neg_score": 0.5934, "inbatch_pos_score": 1.293, "learning_rate": 7.444444444444444e-06, "loss": 3.0492, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 66.6961, "num_token_overlap": 15.8326, "num_token_query": 42.5383, "num_token_union": 68.572, "num_word_context": 202.2068, "num_word_doc": 49.7803, "num_word_query": 32.1506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2172.9774, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 1.48, "queue_k_norm": 1.5587, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5383, "sent_len_1": 66.6961, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.8663, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.0488, "doc_norm": 1.5563, "encoder_q-embeddings": 958.2989, "encoder_q-layer.0": 625.8842, "encoder_q-layer.1": 676.9034, "encoder_q-layer.10": 1257.7539, "encoder_q-layer.11": 2764.8784, "encoder_q-layer.2": 772.569, "encoder_q-layer.3": 800.5039, "encoder_q-layer.4": 848.9066, "encoder_q-layer.5": 901.2181, "encoder_q-layer.6": 986.5547, "encoder_q-layer.7": 1051.3254, "encoder_q-layer.8": 1232.5321, "encoder_q-layer.9": 1158.6896, "epoch": 0.85, "inbatch_neg_score": 0.5933, "inbatch_pos_score": 1.2959, "learning_rate": 7.38888888888889e-06, "loss": 3.0488, "norm_diff": 0.0757, "norm_loss": 0.0, "num_token_doc": 66.9528, "num_token_overlap": 15.931, "num_token_query": 42.4582, "num_token_union": 68.575, "num_word_context": 202.3064, "num_word_doc": 49.9723, "num_word_query": 32.0576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1833.1456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.4806, "queue_k_norm": 1.5588, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4582, "sent_len_1": 66.9528, "sent_len_max_0": 128.0, "sent_len_max_1": 191.6675, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 58.1055, "active_queue_size": 16384.0, "cl_loss": 3.0325, "doc_norm": 1.5602, "encoder_q-embeddings": 1104.7915, "encoder_q-layer.0": 711.311, "encoder_q-layer.1": 756.5724, "encoder_q-layer.10": 1197.1592, "encoder_q-layer.11": 2805.5791, "encoder_q-layer.2": 842.4346, "encoder_q-layer.3": 857.7578, "encoder_q-layer.4": 919.7986, "encoder_q-layer.5": 950.0887, "encoder_q-layer.6": 1071.3363, "encoder_q-layer.7": 1164.1052, "encoder_q-layer.8": 1274.2361, "encoder_q-layer.9": 1123.4333, "epoch": 0.85, "inbatch_neg_score": 0.5933, "inbatch_pos_score": 1.3008, "learning_rate": 7.333333333333334e-06, "loss": 3.0325, "norm_diff": 0.0937, "norm_loss": 0.0, "num_token_doc": 66.8966, "num_token_overlap": 15.9535, "num_token_query": 42.538, "num_token_union": 68.5546, "num_word_context": 202.4689, "num_word_doc": 49.8736, "num_word_query": 32.1278, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1932.4987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.4664, "queue_k_norm": 1.5579, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.538, "sent_len_1": 66.8966, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.5337, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0327, "doc_norm": 1.5546, "encoder_q-embeddings": 1489.4508, "encoder_q-layer.0": 1054.5295, "encoder_q-layer.1": 1143.5139, "encoder_q-layer.10": 1430.0107, "encoder_q-layer.11": 3001.0535, "encoder_q-layer.2": 1346.472, "encoder_q-layer.3": 1396.2969, "encoder_q-layer.4": 1580.6791, "encoder_q-layer.5": 1667.9427, "encoder_q-layer.6": 1713.8258, "encoder_q-layer.7": 1772.8761, "encoder_q-layer.8": 1739.6538, "encoder_q-layer.9": 1335.625, "epoch": 0.85, "inbatch_neg_score": 0.5909, "inbatch_pos_score": 1.2637, "learning_rate": 7.277777777777778e-06, "loss": 3.0327, "norm_diff": 0.093, "norm_loss": 0.0, "num_token_doc": 66.5572, "num_token_overlap": 15.8767, "num_token_query": 42.3619, "num_token_union": 68.3692, "num_word_context": 201.8359, "num_word_doc": 49.6395, "num_word_query": 31.9975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2489.5171, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.4617, "queue_k_norm": 1.5568, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3619, "sent_len_1": 66.5572, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.5987, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.0238, "doc_norm": 1.5604, "encoder_q-embeddings": 1100.8618, "encoder_q-layer.0": 779.155, "encoder_q-layer.1": 816.8602, "encoder_q-layer.10": 1254.8647, "encoder_q-layer.11": 3161.7742, "encoder_q-layer.2": 1008.3088, "encoder_q-layer.3": 993.8939, "encoder_q-layer.4": 995.4814, "encoder_q-layer.5": 916.7396, "encoder_q-layer.6": 1034.3213, "encoder_q-layer.7": 1135.6023, "encoder_q-layer.8": 1282.88, "encoder_q-layer.9": 1167.3185, "epoch": 0.85, "inbatch_neg_score": 0.5983, "inbatch_pos_score": 1.2666, "learning_rate": 7.222222222222222e-06, "loss": 3.0238, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.8117, "num_token_overlap": 15.8663, "num_token_query": 42.3773, "num_token_union": 68.4901, "num_word_context": 202.3463, "num_word_doc": 49.8475, "num_word_query": 32.041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2062.2163, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5972, "query_norm": 1.4546, "queue_k_norm": 1.5572, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3773, "sent_len_1": 66.8117, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0662, "stdk": 0.0489, "stdq": 0.0443, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.0507, "doc_norm": 1.5668, "encoder_q-embeddings": 4973.355, "encoder_q-layer.0": 3157.3503, "encoder_q-layer.1": 3845.5366, "encoder_q-layer.10": 1224.6426, "encoder_q-layer.11": 2842.7178, "encoder_q-layer.2": 4640.5972, "encoder_q-layer.3": 5322.1338, "encoder_q-layer.4": 5381.1934, "encoder_q-layer.5": 5641.5498, "encoder_q-layer.6": 6131.3726, "encoder_q-layer.7": 4297.5254, "encoder_q-layer.8": 2507.021, "encoder_q-layer.9": 1370.0132, "epoch": 0.85, "inbatch_neg_score": 0.595, "inbatch_pos_score": 1.293, "learning_rate": 7.166666666666667e-06, "loss": 3.0507, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.7659, "num_token_overlap": 15.8423, "num_token_query": 42.315, "num_token_union": 68.4945, "num_word_context": 202.1576, "num_word_doc": 49.8298, "num_word_query": 31.9751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6273.7446, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5957, "query_norm": 1.4714, "queue_k_norm": 1.5591, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.315, "sent_len_1": 66.7659, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.3787, "stdk": 0.0492, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0564, "doc_norm": 1.5603, "encoder_q-embeddings": 1612.1289, "encoder_q-layer.0": 1111.3066, "encoder_q-layer.1": 1241.7947, "encoder_q-layer.10": 1290.1342, "encoder_q-layer.11": 2974.3853, "encoder_q-layer.2": 1419.1747, "encoder_q-layer.3": 1517.2897, "encoder_q-layer.4": 1503.252, "encoder_q-layer.5": 1466.877, "encoder_q-layer.6": 1331.8472, "encoder_q-layer.7": 1332.3358, "encoder_q-layer.8": 1366.7537, "encoder_q-layer.9": 1233.1165, "epoch": 0.85, "inbatch_neg_score": 0.5947, "inbatch_pos_score": 1.2891, "learning_rate": 7.111111111111112e-06, "loss": 3.0564, "norm_diff": 0.09, "norm_loss": 0.0, "num_token_doc": 66.6214, "num_token_overlap": 15.7742, "num_token_query": 42.2974, "num_token_union": 68.4095, "num_word_context": 201.8994, "num_word_doc": 49.7126, "num_word_query": 31.9603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2400.6629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5962, "query_norm": 1.4702, "queue_k_norm": 1.5589, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2974, "sent_len_1": 66.6214, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5925, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0661, "doc_norm": 1.5616, "encoder_q-embeddings": 4227.0444, "encoder_q-layer.0": 2926.9053, "encoder_q-layer.1": 3682.4873, "encoder_q-layer.10": 1382.8158, "encoder_q-layer.11": 2849.3545, "encoder_q-layer.2": 4655.6538, "encoder_q-layer.3": 5400.436, "encoder_q-layer.4": 5567.2852, "encoder_q-layer.5": 5991.8535, "encoder_q-layer.6": 5423.3115, "encoder_q-layer.7": 3998.6143, "encoder_q-layer.8": 3262.2266, "encoder_q-layer.9": 1659.7461, "epoch": 0.85, "inbatch_neg_score": 0.5958, "inbatch_pos_score": 1.292, "learning_rate": 7.055555555555556e-06, "loss": 3.0661, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.7425, "num_token_overlap": 15.7666, "num_token_query": 42.2413, "num_token_union": 68.441, "num_word_context": 202.1224, "num_word_doc": 49.8341, "num_word_query": 31.8873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6161.1726, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5957, "query_norm": 1.4858, "queue_k_norm": 1.5592, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2413, "sent_len_1": 66.7425, "sent_len_max_0": 127.99, "sent_len_max_1": 188.7025, "stdk": 0.0489, "stdq": 0.0457, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0644, "doc_norm": 1.5623, "encoder_q-embeddings": 957.6416, "encoder_q-layer.0": 634.1946, "encoder_q-layer.1": 649.9551, "encoder_q-layer.10": 1382.8132, "encoder_q-layer.11": 3081.9041, "encoder_q-layer.2": 740.4857, "encoder_q-layer.3": 777.2958, "encoder_q-layer.4": 812.4393, "encoder_q-layer.5": 854.9239, "encoder_q-layer.6": 993.5501, "encoder_q-layer.7": 1149.8251, "encoder_q-layer.8": 1345.2924, "encoder_q-layer.9": 1229.5107, "epoch": 0.85, "inbatch_neg_score": 0.5977, "inbatch_pos_score": 1.293, "learning_rate": 7.000000000000001e-06, "loss": 3.0644, "norm_diff": 0.0779, "norm_loss": 0.0, "num_token_doc": 66.7272, "num_token_overlap": 15.7547, "num_token_query": 42.1568, "num_token_union": 68.4392, "num_word_context": 202.2835, "num_word_doc": 49.8389, "num_word_query": 31.8666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1961.954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5967, "query_norm": 1.4843, "queue_k_norm": 1.5604, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1568, "sent_len_1": 66.7272, "sent_len_max_0": 128.0, "sent_len_max_1": 189.96, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.0542, "doc_norm": 1.5559, "encoder_q-embeddings": 1238.8796, "encoder_q-layer.0": 868.1738, "encoder_q-layer.1": 937.3142, "encoder_q-layer.10": 1431.3008, "encoder_q-layer.11": 2984.5889, "encoder_q-layer.2": 1081.0864, "encoder_q-layer.3": 1115.158, "encoder_q-layer.4": 1201.8662, "encoder_q-layer.5": 1156.0822, "encoder_q-layer.6": 1223.2987, "encoder_q-layer.7": 1317.177, "encoder_q-layer.8": 1375.5187, "encoder_q-layer.9": 1205.8208, "epoch": 0.85, "inbatch_neg_score": 0.5957, "inbatch_pos_score": 1.2559, "learning_rate": 6.944444444444445e-06, "loss": 3.0542, "norm_diff": 0.0855, "norm_loss": 0.0, "num_token_doc": 66.7155, "num_token_overlap": 15.8745, "num_token_query": 42.3662, "num_token_union": 68.4103, "num_word_context": 202.117, "num_word_doc": 49.7423, "num_word_query": 32.0078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2156.3897, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5942, "query_norm": 1.4703, "queue_k_norm": 1.5596, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3662, "sent_len_1": 66.7155, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.1712, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0489, "doc_norm": 1.5567, "encoder_q-embeddings": 945.3127, "encoder_q-layer.0": 636.3071, "encoder_q-layer.1": 663.5605, "encoder_q-layer.10": 1355.6595, "encoder_q-layer.11": 3039.2412, "encoder_q-layer.2": 752.093, "encoder_q-layer.3": 775.2713, "encoder_q-layer.4": 845.3603, "encoder_q-layer.5": 895.8604, "encoder_q-layer.6": 1049.7667, "encoder_q-layer.7": 1297.7642, "encoder_q-layer.8": 1462.021, "encoder_q-layer.9": 1336.0895, "epoch": 0.86, "inbatch_neg_score": 0.5962, "inbatch_pos_score": 1.2881, "learning_rate": 6.888888888888889e-06, "loss": 3.0489, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 66.6022, "num_token_overlap": 15.8572, "num_token_query": 42.4776, "num_token_union": 68.4918, "num_word_context": 202.1298, "num_word_doc": 49.713, "num_word_query": 32.0932, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1963.0892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5962, "query_norm": 1.4615, "queue_k_norm": 1.5581, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4776, "sent_len_1": 66.6022, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.545, "stdk": 0.0487, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.0484, "doc_norm": 1.5586, "encoder_q-embeddings": 1073.6991, "encoder_q-layer.0": 695.6863, "encoder_q-layer.1": 755.6633, "encoder_q-layer.10": 1338.9938, "encoder_q-layer.11": 3021.7219, "encoder_q-layer.2": 889.1053, "encoder_q-layer.3": 949.1608, "encoder_q-layer.4": 1038.5437, "encoder_q-layer.5": 1035.2335, "encoder_q-layer.6": 1151.9729, "encoder_q-layer.7": 1240.1224, "encoder_q-layer.8": 1339.6434, "encoder_q-layer.9": 1228.0042, "epoch": 0.86, "inbatch_neg_score": 0.5928, "inbatch_pos_score": 1.2656, "learning_rate": 6.833333333333333e-06, "loss": 3.0484, "norm_diff": 0.0879, "norm_loss": 0.0, "num_token_doc": 66.844, "num_token_overlap": 15.8468, "num_token_query": 42.3807, "num_token_union": 68.5641, "num_word_context": 202.355, "num_word_doc": 49.8908, "num_word_query": 32.0215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2017.3276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5928, "query_norm": 1.4707, "queue_k_norm": 1.5602, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3807, "sent_len_1": 66.844, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.4762, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0607, "doc_norm": 1.5591, "encoder_q-embeddings": 2486.2009, "encoder_q-layer.0": 1776.5229, "encoder_q-layer.1": 1857.6813, "encoder_q-layer.10": 2339.2324, "encoder_q-layer.11": 5710.6484, "encoder_q-layer.2": 2124.0381, "encoder_q-layer.3": 2197.9648, "encoder_q-layer.4": 2189.6162, "encoder_q-layer.5": 2336.3694, "encoder_q-layer.6": 2559.0679, "encoder_q-layer.7": 2771.8435, "encoder_q-layer.8": 3030.2422, "encoder_q-layer.9": 2451.0146, "epoch": 0.86, "inbatch_neg_score": 0.5944, "inbatch_pos_score": 1.2783, "learning_rate": 6.777777777777779e-06, "loss": 3.0607, "norm_diff": 0.0908, "norm_loss": 0.0, "num_token_doc": 66.861, "num_token_overlap": 15.8493, "num_token_query": 42.3577, "num_token_union": 68.5502, "num_word_context": 202.2597, "num_word_doc": 49.922, "num_word_query": 32.0028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4227.6147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 1.4683, "queue_k_norm": 1.5591, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3577, "sent_len_1": 66.861, "sent_len_max_0": 128.0, "sent_len_max_1": 188.3638, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.0436, "doc_norm": 1.5569, "encoder_q-embeddings": 2175.9214, "encoder_q-layer.0": 1404.9264, "encoder_q-layer.1": 1481.5822, "encoder_q-layer.10": 2585.7148, "encoder_q-layer.11": 6119.1904, "encoder_q-layer.2": 1724.9539, "encoder_q-layer.3": 1827.5342, "encoder_q-layer.4": 1914.0764, "encoder_q-layer.5": 2025.7164, "encoder_q-layer.6": 2304.3721, "encoder_q-layer.7": 2647.4287, "encoder_q-layer.8": 2734.9446, "encoder_q-layer.9": 2499.2427, "epoch": 0.86, "inbatch_neg_score": 0.5884, "inbatch_pos_score": 1.2627, "learning_rate": 6.722222222222223e-06, "loss": 3.0436, "norm_diff": 0.0918, "norm_loss": 0.0, "num_token_doc": 66.5794, "num_token_overlap": 15.7999, "num_token_query": 42.3126, "num_token_union": 68.3617, "num_word_context": 201.7114, "num_word_doc": 49.6408, "num_word_query": 31.9267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4073.6195, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5898, "query_norm": 1.4651, "queue_k_norm": 1.5586, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3126, "sent_len_1": 66.5794, "sent_len_max_0": 128.0, "sent_len_max_1": 190.225, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.0345, "doc_norm": 1.5569, "encoder_q-embeddings": 1018.6307, "encoder_q-layer.0": 675.9102, "encoder_q-layer.1": 741.5455, "encoder_q-layer.10": 1322.9374, "encoder_q-layer.11": 2805.0747, "encoder_q-layer.2": 874.1031, "encoder_q-layer.3": 919.9823, "encoder_q-layer.4": 973.0348, "encoder_q-layer.5": 1046.5055, "encoder_q-layer.6": 1159.9417, "encoder_q-layer.7": 1201.9293, "encoder_q-layer.8": 1408.3589, "encoder_q-layer.9": 1177.8865, "epoch": 0.86, "inbatch_neg_score": 0.5925, "inbatch_pos_score": 1.3047, "learning_rate": 6.666666666666667e-06, "loss": 3.0345, "norm_diff": 0.0831, "norm_loss": 0.0, "num_token_doc": 66.5479, "num_token_overlap": 15.8746, "num_token_query": 42.4678, "num_token_union": 68.4069, "num_word_context": 202.0541, "num_word_doc": 49.655, "num_word_query": 32.0826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1959.3314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5933, "query_norm": 1.4738, "queue_k_norm": 1.5589, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4678, "sent_len_1": 66.5479, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6887, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 3.0238, "doc_norm": 1.558, "encoder_q-embeddings": 1440.5382, "encoder_q-layer.0": 939.2548, "encoder_q-layer.1": 1102.6675, "encoder_q-layer.10": 1274.7972, "encoder_q-layer.11": 2942.915, "encoder_q-layer.2": 1310.9642, "encoder_q-layer.3": 1517.333, "encoder_q-layer.4": 1512.5585, "encoder_q-layer.5": 1598.0894, "encoder_q-layer.6": 1761.6604, "encoder_q-layer.7": 1649.4609, "encoder_q-layer.8": 1709.5288, "encoder_q-layer.9": 1396.9938, "epoch": 0.86, "inbatch_neg_score": 0.5876, "inbatch_pos_score": 1.2959, "learning_rate": 6.611111111111111e-06, "loss": 3.0238, "norm_diff": 0.0848, "norm_loss": 0.0, "num_token_doc": 67.0771, "num_token_overlap": 15.9051, "num_token_query": 42.3953, "num_token_union": 68.622, "num_word_context": 202.6061, "num_word_doc": 50.0261, "num_word_query": 32.026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2430.6808, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5889, "query_norm": 1.4731, "queue_k_norm": 1.5601, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3953, "sent_len_1": 67.0771, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.8075, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0326, "doc_norm": 1.5621, "encoder_q-embeddings": 1074.7032, "encoder_q-layer.0": 718.0344, "encoder_q-layer.1": 762.1414, "encoder_q-layer.10": 1443.3414, "encoder_q-layer.11": 3155.1973, "encoder_q-layer.2": 832.2307, "encoder_q-layer.3": 904.2099, "encoder_q-layer.4": 946.1589, "encoder_q-layer.5": 1014.1533, "encoder_q-layer.6": 1137.5166, "encoder_q-layer.7": 1189.1293, "encoder_q-layer.8": 1354.0829, "encoder_q-layer.9": 1204.3826, "epoch": 0.86, "inbatch_neg_score": 0.5893, "inbatch_pos_score": 1.2754, "learning_rate": 6.555555555555556e-06, "loss": 3.0326, "norm_diff": 0.1043, "norm_loss": 0.0, "num_token_doc": 66.8578, "num_token_overlap": 15.8636, "num_token_query": 42.3156, "num_token_union": 68.5118, "num_word_context": 202.0761, "num_word_doc": 49.881, "num_word_query": 31.9583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2076.1547, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5889, "query_norm": 1.4578, "queue_k_norm": 1.5587, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3156, "sent_len_1": 66.8578, "sent_len_max_0": 127.995, "sent_len_max_1": 188.855, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.0402, "doc_norm": 1.5582, "encoder_q-embeddings": 968.5226, "encoder_q-layer.0": 653.4129, "encoder_q-layer.1": 696.4467, "encoder_q-layer.10": 1189.6147, "encoder_q-layer.11": 2813.0359, "encoder_q-layer.2": 802.5765, "encoder_q-layer.3": 834.6567, "encoder_q-layer.4": 904.9858, "encoder_q-layer.5": 932.9854, "encoder_q-layer.6": 1106.7135, "encoder_q-layer.7": 1247.22, "encoder_q-layer.8": 1343.6814, "encoder_q-layer.9": 1171.1744, "epoch": 0.86, "inbatch_neg_score": 0.5907, "inbatch_pos_score": 1.2861, "learning_rate": 6.5000000000000004e-06, "loss": 3.0402, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 67.0292, "num_token_overlap": 15.8546, "num_token_query": 42.3273, "num_token_union": 68.6183, "num_word_context": 202.5361, "num_word_doc": 49.9921, "num_word_query": 31.9836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1908.9436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5908, "query_norm": 1.4716, "queue_k_norm": 1.5585, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3273, "sent_len_1": 67.0292, "sent_len_max_0": 127.975, "sent_len_max_1": 190.2125, "stdk": 0.0488, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.0519, "doc_norm": 1.5588, "encoder_q-embeddings": 4316.5796, "encoder_q-layer.0": 2730.356, "encoder_q-layer.1": 2905.7466, "encoder_q-layer.10": 1211.7085, "encoder_q-layer.11": 2948.6833, "encoder_q-layer.2": 3465.2363, "encoder_q-layer.3": 3778.0979, "encoder_q-layer.4": 4303.9385, "encoder_q-layer.5": 5260.7915, "encoder_q-layer.6": 5911.748, "encoder_q-layer.7": 5217.5537, "encoder_q-layer.8": 4464.2109, "encoder_q-layer.9": 2103.0671, "epoch": 0.86, "inbatch_neg_score": 0.5948, "inbatch_pos_score": 1.2949, "learning_rate": 6.4444444444444445e-06, "loss": 3.0519, "norm_diff": 0.0914, "norm_loss": 0.0, "num_token_doc": 66.7238, "num_token_overlap": 15.7937, "num_token_query": 42.4501, "num_token_union": 68.5919, "num_word_context": 202.1799, "num_word_doc": 49.7701, "num_word_query": 32.0484, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6106.0789, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5938, "query_norm": 1.4674, "queue_k_norm": 1.5607, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4501, "sent_len_1": 66.7238, "sent_len_max_0": 128.0, "sent_len_max_1": 190.215, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0526, "doc_norm": 1.5607, "encoder_q-embeddings": 983.4827, "encoder_q-layer.0": 667.2614, "encoder_q-layer.1": 705.4744, "encoder_q-layer.10": 1370.2844, "encoder_q-layer.11": 3099.9624, "encoder_q-layer.2": 827.201, "encoder_q-layer.3": 855.1017, "encoder_q-layer.4": 962.7345, "encoder_q-layer.5": 929.8337, "encoder_q-layer.6": 1093.9049, "encoder_q-layer.7": 1195.4531, "encoder_q-layer.8": 1423.5459, "encoder_q-layer.9": 1230.8049, "epoch": 0.86, "inbatch_neg_score": 0.5901, "inbatch_pos_score": 1.251, "learning_rate": 6.3888888888888885e-06, "loss": 3.0526, "norm_diff": 0.1033, "norm_loss": 0.0, "num_token_doc": 66.8146, "num_token_overlap": 15.7615, "num_token_query": 42.1743, "num_token_union": 68.4423, "num_word_context": 202.2814, "num_word_doc": 49.8483, "num_word_query": 31.8613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2025.384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5903, "query_norm": 1.4574, "queue_k_norm": 1.5592, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1743, "sent_len_1": 66.8146, "sent_len_max_0": 128.0, "sent_len_max_1": 189.765, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0551, "doc_norm": 1.5546, "encoder_q-embeddings": 1527.8329, "encoder_q-layer.0": 1047.3928, "encoder_q-layer.1": 1105.6222, "encoder_q-layer.10": 1391.8292, "encoder_q-layer.11": 2839.8369, "encoder_q-layer.2": 1328.0281, "encoder_q-layer.3": 1366.4177, "encoder_q-layer.4": 1308.4718, "encoder_q-layer.5": 1227.1301, "encoder_q-layer.6": 1327.1896, "encoder_q-layer.7": 1435.6482, "encoder_q-layer.8": 1551.5979, "encoder_q-layer.9": 1223.8337, "epoch": 0.87, "inbatch_neg_score": 0.5918, "inbatch_pos_score": 1.2695, "learning_rate": 6.333333333333334e-06, "loss": 3.0551, "norm_diff": 0.0963, "norm_loss": 0.0, "num_token_doc": 67.0108, "num_token_overlap": 15.8141, "num_token_query": 42.177, "num_token_union": 68.4964, "num_word_context": 202.2015, "num_word_doc": 49.9652, "num_word_query": 31.8497, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2279.9441, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.4583, "queue_k_norm": 1.5591, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.177, "sent_len_1": 67.0108, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5813, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0504, "doc_norm": 1.5529, "encoder_q-embeddings": 965.3769, "encoder_q-layer.0": 635.6942, "encoder_q-layer.1": 665.8215, "encoder_q-layer.10": 1207.3572, "encoder_q-layer.11": 2788.0078, "encoder_q-layer.2": 745.9915, "encoder_q-layer.3": 778.4683, "encoder_q-layer.4": 816.0226, "encoder_q-layer.5": 843.5434, "encoder_q-layer.6": 991.0504, "encoder_q-layer.7": 1094.5038, "encoder_q-layer.8": 1220.4484, "encoder_q-layer.9": 1156.4622, "epoch": 0.87, "inbatch_neg_score": 0.5915, "inbatch_pos_score": 1.2773, "learning_rate": 6.277777777777778e-06, "loss": 3.0504, "norm_diff": 0.0955, "norm_loss": 0.0, "num_token_doc": 66.7886, "num_token_overlap": 15.8825, "num_token_query": 42.4152, "num_token_union": 68.4796, "num_word_context": 202.4772, "num_word_doc": 49.8415, "num_word_query": 32.0472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1838.0425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.4574, "queue_k_norm": 1.5592, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4152, "sent_len_1": 66.7886, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.9162, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0406, "doc_norm": 1.5535, "encoder_q-embeddings": 1195.1277, "encoder_q-layer.0": 754.3021, "encoder_q-layer.1": 810.3364, "encoder_q-layer.10": 1203.061, "encoder_q-layer.11": 2884.0356, "encoder_q-layer.2": 935.4074, "encoder_q-layer.3": 969.8518, "encoder_q-layer.4": 1061.3981, "encoder_q-layer.5": 996.4443, "encoder_q-layer.6": 1056.9944, "encoder_q-layer.7": 1228.7792, "encoder_q-layer.8": 1353.053, "encoder_q-layer.9": 1214.7629, "epoch": 0.87, "inbatch_neg_score": 0.5905, "inbatch_pos_score": 1.2812, "learning_rate": 6.222222222222222e-06, "loss": 3.0406, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.738, "num_token_overlap": 15.8523, "num_token_query": 42.2682, "num_token_union": 68.3762, "num_word_context": 202.0323, "num_word_doc": 49.8049, "num_word_query": 31.9469, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2032.5709, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5908, "query_norm": 1.4686, "queue_k_norm": 1.5603, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2682, "sent_len_1": 66.738, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.5375, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0348, "doc_norm": 1.5649, "encoder_q-embeddings": 1227.4974, "encoder_q-layer.0": 821.3032, "encoder_q-layer.1": 954.3753, "encoder_q-layer.10": 1254.1654, "encoder_q-layer.11": 2899.3574, "encoder_q-layer.2": 1093.261, "encoder_q-layer.3": 1135.7834, "encoder_q-layer.4": 1210.8733, "encoder_q-layer.5": 1271.6737, "encoder_q-layer.6": 1442.8967, "encoder_q-layer.7": 1528.9857, "encoder_q-layer.8": 1489.6282, "encoder_q-layer.9": 1236.4817, "epoch": 0.87, "inbatch_neg_score": 0.5931, "inbatch_pos_score": 1.2871, "learning_rate": 6.166666666666667e-06, "loss": 3.0348, "norm_diff": 0.0949, "norm_loss": 0.0, "num_token_doc": 66.8275, "num_token_overlap": 15.8187, "num_token_query": 42.3298, "num_token_union": 68.491, "num_word_context": 201.9161, "num_word_doc": 49.8502, "num_word_query": 31.9728, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2184.917, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.47, "queue_k_norm": 1.5592, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3298, "sent_len_1": 66.8275, "sent_len_max_0": 127.9925, "sent_len_max_1": 190.9387, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0581, "doc_norm": 1.5544, "encoder_q-embeddings": 590.8307, "encoder_q-layer.0": 392.2331, "encoder_q-layer.1": 413.6125, "encoder_q-layer.10": 626.0483, "encoder_q-layer.11": 1585.2791, "encoder_q-layer.2": 485.9718, "encoder_q-layer.3": 500.4788, "encoder_q-layer.4": 530.583, "encoder_q-layer.5": 557.2761, "encoder_q-layer.6": 606.0632, "encoder_q-layer.7": 657.5483, "encoder_q-layer.8": 718.2737, "encoder_q-layer.9": 619.4126, "epoch": 0.87, "inbatch_neg_score": 0.5898, "inbatch_pos_score": 1.2871, "learning_rate": 6.111111111111111e-06, "loss": 3.0581, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.8273, "num_token_overlap": 15.8369, "num_token_query": 42.3379, "num_token_union": 68.498, "num_word_context": 202.35, "num_word_doc": 49.8425, "num_word_query": 31.9724, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1078.1713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5898, "query_norm": 1.4752, "queue_k_norm": 1.5605, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3379, "sent_len_1": 66.8273, "sent_len_max_0": 128.0, "sent_len_max_1": 189.945, "stdk": 0.0486, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 58.8867, "active_queue_size": 16384.0, "cl_loss": 3.0154, "doc_norm": 1.5634, "encoder_q-embeddings": 495.6987, "encoder_q-layer.0": 324.8434, "encoder_q-layer.1": 354.3095, "encoder_q-layer.10": 623.5865, "encoder_q-layer.11": 1485.2751, "encoder_q-layer.2": 410.0023, "encoder_q-layer.3": 426.1096, "encoder_q-layer.4": 448.5595, "encoder_q-layer.5": 470.9298, "encoder_q-layer.6": 548.9369, "encoder_q-layer.7": 598.7642, "encoder_q-layer.8": 664.9401, "encoder_q-layer.9": 602.2227, "epoch": 0.87, "inbatch_neg_score": 0.591, "inbatch_pos_score": 1.293, "learning_rate": 6.055555555555556e-06, "loss": 3.0154, "norm_diff": 0.0969, "norm_loss": 0.0, "num_token_doc": 66.9958, "num_token_overlap": 15.9305, "num_token_query": 42.6082, "num_token_union": 68.6933, "num_word_context": 202.5102, "num_word_doc": 49.9913, "num_word_query": 32.1834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 973.76, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5908, "query_norm": 1.4665, "queue_k_norm": 1.5605, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.6082, "sent_len_1": 66.9958, "sent_len_max_0": 127.995, "sent_len_max_1": 188.5987, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0451, "doc_norm": 1.5517, "encoder_q-embeddings": 621.0392, "encoder_q-layer.0": 426.8081, "encoder_q-layer.1": 452.4854, "encoder_q-layer.10": 662.3086, "encoder_q-layer.11": 1424.2738, "encoder_q-layer.2": 525.4892, "encoder_q-layer.3": 567.8163, "encoder_q-layer.4": 594.9758, "encoder_q-layer.5": 602.6646, "encoder_q-layer.6": 617.0475, "encoder_q-layer.7": 611.7865, "encoder_q-layer.8": 692.5825, "encoder_q-layer.9": 598.2377, "epoch": 0.87, "inbatch_neg_score": 0.5878, "inbatch_pos_score": 1.2686, "learning_rate": 6e-06, "loss": 3.0451, "norm_diff": 0.0794, "norm_loss": 0.0, "num_token_doc": 66.9636, "num_token_overlap": 15.872, "num_token_query": 42.3178, "num_token_union": 68.5189, "num_word_context": 202.2135, "num_word_doc": 49.9318, "num_word_query": 31.9612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1050.7412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5898, "query_norm": 1.4723, "queue_k_norm": 1.5599, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3178, "sent_len_1": 66.9636, "sent_len_max_0": 127.9938, "sent_len_max_1": 191.185, "stdk": 0.0486, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 54.2969, "active_queue_size": 16384.0, "cl_loss": 3.0323, "doc_norm": 1.5575, "encoder_q-embeddings": 476.4658, "encoder_q-layer.0": 311.0892, "encoder_q-layer.1": 326.882, "encoder_q-layer.10": 594.7507, "encoder_q-layer.11": 1459.2911, "encoder_q-layer.2": 351.0733, "encoder_q-layer.3": 369.3513, "encoder_q-layer.4": 382.689, "encoder_q-layer.5": 395.6398, "encoder_q-layer.6": 460.3376, "encoder_q-layer.7": 510.7701, "encoder_q-layer.8": 586.6694, "encoder_q-layer.9": 573.3951, "epoch": 0.87, "inbatch_neg_score": 0.5903, "inbatch_pos_score": 1.252, "learning_rate": 5.944444444444445e-06, "loss": 3.0323, "norm_diff": 0.0965, "norm_loss": 0.0, "num_token_doc": 66.6281, "num_token_overlap": 15.8021, "num_token_query": 42.4078, "num_token_union": 68.4596, "num_word_context": 202.2702, "num_word_doc": 49.7009, "num_word_query": 32.0257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 925.2068, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5908, "query_norm": 1.461, "queue_k_norm": 1.5579, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4078, "sent_len_1": 66.6281, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3262, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0519, "doc_norm": 1.5612, "encoder_q-embeddings": 564.0303, "encoder_q-layer.0": 358.496, "encoder_q-layer.1": 372.7611, "encoder_q-layer.10": 680.6704, "encoder_q-layer.11": 1591.2518, "encoder_q-layer.2": 427.5557, "encoder_q-layer.3": 425.9535, "encoder_q-layer.4": 445.4851, "encoder_q-layer.5": 466.4937, "encoder_q-layer.6": 510.2901, "encoder_q-layer.7": 561.2578, "encoder_q-layer.8": 661.7003, "encoder_q-layer.9": 609.8612, "epoch": 0.87, "inbatch_neg_score": 0.5895, "inbatch_pos_score": 1.2852, "learning_rate": 5.888888888888889e-06, "loss": 3.0519, "norm_diff": 0.086, "norm_loss": 0.0, "num_token_doc": 67.0534, "num_token_overlap": 15.8276, "num_token_query": 42.3824, "num_token_union": 68.6916, "num_word_context": 202.7093, "num_word_doc": 50.0282, "num_word_query": 31.9949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1042.2192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5894, "query_norm": 1.4752, "queue_k_norm": 1.5601, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3824, "sent_len_1": 67.0534, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.1463, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0363, "doc_norm": 1.5569, "encoder_q-embeddings": 655.8785, "encoder_q-layer.0": 436.4369, "encoder_q-layer.1": 469.3894, "encoder_q-layer.10": 655.6862, "encoder_q-layer.11": 1493.3635, "encoder_q-layer.2": 536.8701, "encoder_q-layer.3": 559.8041, "encoder_q-layer.4": 597.89, "encoder_q-layer.5": 606.7281, "encoder_q-layer.6": 630.3891, "encoder_q-layer.7": 664.4238, "encoder_q-layer.8": 723.8486, "encoder_q-layer.9": 607.4247, "epoch": 0.87, "inbatch_neg_score": 0.5921, "inbatch_pos_score": 1.2764, "learning_rate": 5.833333333333334e-06, "loss": 3.0363, "norm_diff": 0.0846, "norm_loss": 0.0, "num_token_doc": 66.7762, "num_token_overlap": 15.8535, "num_token_query": 42.3462, "num_token_union": 68.4536, "num_word_context": 201.9721, "num_word_doc": 49.7934, "num_word_query": 31.9877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1095.314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.4722, "queue_k_norm": 1.558, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3462, "sent_len_1": 66.7762, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.2225, "stdk": 0.0487, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0401, "doc_norm": 1.5607, "encoder_q-embeddings": 700.9909, "encoder_q-layer.0": 479.4182, "encoder_q-layer.1": 529.2414, "encoder_q-layer.10": 704.6282, "encoder_q-layer.11": 1506.4347, "encoder_q-layer.2": 588.5489, "encoder_q-layer.3": 654.8428, "encoder_q-layer.4": 661.3395, "encoder_q-layer.5": 690.6197, "encoder_q-layer.6": 734.903, "encoder_q-layer.7": 713.0567, "encoder_q-layer.8": 729.1787, "encoder_q-layer.9": 591.5629, "epoch": 0.87, "inbatch_neg_score": 0.5986, "inbatch_pos_score": 1.2959, "learning_rate": 5.777777777777778e-06, "loss": 3.0401, "norm_diff": 0.0922, "norm_loss": 0.0, "num_token_doc": 66.8394, "num_token_overlap": 15.8258, "num_token_query": 42.459, "num_token_union": 68.5821, "num_word_context": 202.3449, "num_word_doc": 49.8426, "num_word_query": 32.0937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1147.38, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5981, "query_norm": 1.4684, "queue_k_norm": 1.5605, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.459, "sent_len_1": 66.8394, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.17, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0601, "doc_norm": 1.558, "encoder_q-embeddings": 826.5129, "encoder_q-layer.0": 542.6211, "encoder_q-layer.1": 628.4573, "encoder_q-layer.10": 556.7861, "encoder_q-layer.11": 1380.4623, "encoder_q-layer.2": 746.5717, "encoder_q-layer.3": 788.3787, "encoder_q-layer.4": 770.6624, "encoder_q-layer.5": 849.2787, "encoder_q-layer.6": 837.7883, "encoder_q-layer.7": 843.8257, "encoder_q-layer.8": 841.3026, "encoder_q-layer.9": 639.9588, "epoch": 0.88, "inbatch_neg_score": 0.5961, "inbatch_pos_score": 1.2939, "learning_rate": 5.722222222222223e-06, "loss": 3.0601, "norm_diff": 0.0908, "norm_loss": 0.0, "num_token_doc": 66.6412, "num_token_overlap": 15.8494, "num_token_query": 42.3806, "num_token_union": 68.4452, "num_word_context": 202.4534, "num_word_doc": 49.757, "num_word_query": 32.014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1239.9199, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5957, "query_norm": 1.4672, "queue_k_norm": 1.5589, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3806, "sent_len_1": 66.6412, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.135, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.04, "doc_norm": 1.5569, "encoder_q-embeddings": 596.996, "encoder_q-layer.0": 407.2119, "encoder_q-layer.1": 438.8253, "encoder_q-layer.10": 609.2579, "encoder_q-layer.11": 1453.939, "encoder_q-layer.2": 510.0499, "encoder_q-layer.3": 507.4018, "encoder_q-layer.4": 517.83, "encoder_q-layer.5": 511.5889, "encoder_q-layer.6": 541.3693, "encoder_q-layer.7": 593.0508, "encoder_q-layer.8": 640.0905, "encoder_q-layer.9": 582.4908, "epoch": 0.88, "inbatch_neg_score": 0.5943, "inbatch_pos_score": 1.2695, "learning_rate": 5.666666666666667e-06, "loss": 3.04, "norm_diff": 0.0919, "norm_loss": 0.0, "num_token_doc": 66.7418, "num_token_overlap": 15.8311, "num_token_query": 42.4495, "num_token_union": 68.5457, "num_word_context": 202.4627, "num_word_doc": 49.8463, "num_word_query": 32.075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1030.255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5938, "query_norm": 1.465, "queue_k_norm": 1.5599, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4495, "sent_len_1": 66.7418, "sent_len_max_0": 127.9938, "sent_len_max_1": 188.3862, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.0524, "doc_norm": 1.5623, "encoder_q-embeddings": 471.1099, "encoder_q-layer.0": 316.8812, "encoder_q-layer.1": 343.5526, "encoder_q-layer.10": 590.2726, "encoder_q-layer.11": 1393.9402, "encoder_q-layer.2": 389.9344, "encoder_q-layer.3": 406.5892, "encoder_q-layer.4": 454.7239, "encoder_q-layer.5": 448.3453, "encoder_q-layer.6": 506.744, "encoder_q-layer.7": 525.0115, "encoder_q-layer.8": 597.3072, "encoder_q-layer.9": 568.4721, "epoch": 0.88, "inbatch_neg_score": 0.5937, "inbatch_pos_score": 1.3066, "learning_rate": 5.611111111111112e-06, "loss": 3.0524, "norm_diff": 0.0903, "norm_loss": 0.0, "num_token_doc": 66.7926, "num_token_overlap": 15.7922, "num_token_query": 42.2374, "num_token_union": 68.4805, "num_word_context": 202.7158, "num_word_doc": 49.87, "num_word_query": 31.8966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 918.5244, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5942, "query_norm": 1.472, "queue_k_norm": 1.5611, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2374, "sent_len_1": 66.7926, "sent_len_max_0": 127.9912, "sent_len_max_1": 187.97, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0678, "doc_norm": 1.5542, "encoder_q-embeddings": 1450.6113, "encoder_q-layer.0": 1011.3538, "encoder_q-layer.1": 1109.8749, "encoder_q-layer.10": 652.4465, "encoder_q-layer.11": 1472.2356, "encoder_q-layer.2": 1324.682, "encoder_q-layer.3": 1438.5549, "encoder_q-layer.4": 1524.2792, "encoder_q-layer.5": 1683.6073, "encoder_q-layer.6": 1688.4963, "encoder_q-layer.7": 1497.2095, "encoder_q-layer.8": 1006.3546, "encoder_q-layer.9": 604.9812, "epoch": 0.88, "inbatch_neg_score": 0.5975, "inbatch_pos_score": 1.2861, "learning_rate": 5.555555555555556e-06, "loss": 3.0678, "norm_diff": 0.0872, "norm_loss": 0.0, "num_token_doc": 66.7082, "num_token_overlap": 15.7782, "num_token_query": 42.173, "num_token_union": 68.3511, "num_word_context": 201.842, "num_word_doc": 49.7876, "num_word_query": 31.8164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1992.7549, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5967, "query_norm": 1.467, "queue_k_norm": 1.56, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.173, "sent_len_1": 66.7082, "sent_len_max_0": 127.99, "sent_len_max_1": 189.235, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 27.2305, "dev_samples_per_second": 2.35, "dev_steps_per_second": 0.037, "epoch": 0.88, "step": 90000, "test_accuracy": 93.78662109375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3445219397544861, "test_doc_norm": 1.5468926429748535, "test_inbatch_neg_score": 0.9716705679893494, "test_inbatch_pos_score": 1.9343433380126953, "test_loss": 0.3445219397544861, "test_loss_align": 0.9851247668266296, "test_loss_unif": 3.300445556640625, "test_loss_unif_q@queue": 3.300445556640625, "test_norm_diff": 0.014680592343211174, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5957622528076172, "test_query_norm": 1.5592944622039795, "test_queue_k_norm": 1.5598721504211426, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042977962642908096, "test_stdq": 0.043030526489019394, "test_stdqueue_k": 0.048946529626846313, "test_stdqueue_q": 0.0 }, { "dev_runtime": 27.2305, "dev_samples_per_second": 2.35, "dev_steps_per_second": 0.037, "epoch": 0.88, "eval_beir-arguana_ndcg@10": 0.39196, "eval_beir-arguana_recall@10": 0.65861, "eval_beir-arguana_recall@100": 0.9367, "eval_beir-arguana_recall@20": 0.80085, "eval_beir-avg_ndcg@10": 0.3806385, "eval_beir-avg_recall@10": 0.45172141666666665, "eval_beir-avg_recall@100": 0.6345749166666667, "eval_beir-avg_recall@20": 0.51472625, "eval_beir-cqadupstack_ndcg@10": 0.26575499999999996, "eval_beir-cqadupstack_recall@10": 0.36003416666666666, "eval_beir-cqadupstack_recall@100": 0.5985591666666666, "eval_beir-cqadupstack_recall@20": 0.4302725, "eval_beir-fiqa_ndcg@10": 0.25409, "eval_beir-fiqa_recall@10": 0.31404, "eval_beir-fiqa_recall@100": 0.5908, "eval_beir-fiqa_recall@20": 0.38443, "eval_beir-nfcorpus_ndcg@10": 0.29314, "eval_beir-nfcorpus_recall@10": 0.14345, "eval_beir-nfcorpus_recall@100": 0.28289, "eval_beir-nfcorpus_recall@20": 0.17709, "eval_beir-nq_ndcg@10": 0.27751, "eval_beir-nq_recall@10": 0.45191, "eval_beir-nq_recall@100": 0.79748, "eval_beir-nq_recall@20": 0.5765, "eval_beir-quora_ndcg@10": 0.77606, "eval_beir-quora_recall@10": 0.88496, "eval_beir-quora_recall@100": 0.97539, "eval_beir-quora_recall@20": 0.92484, "eval_beir-scidocs_ndcg@10": 0.15468, "eval_beir-scidocs_recall@10": 0.16203, "eval_beir-scidocs_recall@100": 0.3739, "eval_beir-scidocs_recall@20": 0.22113, "eval_beir-scifact_ndcg@10": 0.65836, "eval_beir-scifact_recall@10": 0.80511, "eval_beir-scifact_recall@100": 0.90989, "eval_beir-scifact_recall@20": 0.84189, "eval_beir-trec-covid_ndcg@10": 0.55453, "eval_beir-trec-covid_recall@10": 0.608, "eval_beir-trec-covid_recall@100": 0.4628, "eval_beir-trec-covid_recall@20": 0.579, "eval_beir-webis-touche2020_ndcg@10": 0.1803, "eval_beir-webis-touche2020_recall@10": 0.12907, "eval_beir-webis-touche2020_recall@100": 0.41734, "eval_beir-webis-touche2020_recall@20": 0.21126, "eval_senteval-avg_sts": 0.7599547574246401, "eval_senteval-sickr_spearman": 0.7315454714916078, "eval_senteval-stsb_spearman": 0.7883640433576725, "step": 90000, "test_accuracy": 93.78662109375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3445219397544861, "test_doc_norm": 1.5468926429748535, "test_inbatch_neg_score": 0.9716705679893494, "test_inbatch_pos_score": 1.9343433380126953, "test_loss": 0.3445219397544861, "test_loss_align": 0.9851247668266296, "test_loss_unif": 3.300445556640625, "test_loss_unif_q@queue": 3.300445556640625, "test_norm_diff": 0.014680592343211174, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5957622528076172, "test_query_norm": 1.5592944622039795, "test_queue_k_norm": 1.5598721504211426, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042977962642908096, "test_stdq": 0.043030526489019394, "test_stdqueue_k": 0.048946529626846313, "test_stdqueue_q": 0.0 }, { "accuracy": 57.6172, "active_queue_size": 16384.0, "cl_loss": 3.035, "doc_norm": 1.5643, "encoder_q-embeddings": 860.6548, "encoder_q-layer.0": 577.3458, "encoder_q-layer.1": 713.9843, "encoder_q-layer.10": 571.4011, "encoder_q-layer.11": 1388.1219, "encoder_q-layer.2": 1003.2946, "encoder_q-layer.3": 1113.1462, "encoder_q-layer.4": 1117.1874, "encoder_q-layer.5": 1063.8876, "encoder_q-layer.6": 1105.1304, "encoder_q-layer.7": 951.2333, "encoder_q-layer.8": 882.6343, "encoder_q-layer.9": 650.2847, "epoch": 0.88, "inbatch_neg_score": 0.5979, "inbatch_pos_score": 1.2988, "learning_rate": 5.500000000000001e-06, "loss": 3.035, "norm_diff": 0.0931, "norm_loss": 0.0, "num_token_doc": 66.7301, "num_token_overlap": 15.8456, "num_token_query": 42.5002, "num_token_union": 68.5446, "num_word_context": 202.3118, "num_word_doc": 49.7576, "num_word_query": 32.11, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1450.7287, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5972, "query_norm": 1.4712, "queue_k_norm": 1.5597, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5002, "sent_len_1": 66.7301, "sent_len_max_0": 127.9825, "sent_len_max_1": 189.9387, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0412, "doc_norm": 1.5532, "encoder_q-embeddings": 545.416, "encoder_q-layer.0": 359.2222, "encoder_q-layer.1": 386.1259, "encoder_q-layer.10": 620.0435, "encoder_q-layer.11": 1443.8632, "encoder_q-layer.2": 440.2385, "encoder_q-layer.3": 467.1787, "encoder_q-layer.4": 508.7083, "encoder_q-layer.5": 517.5681, "encoder_q-layer.6": 546.4774, "encoder_q-layer.7": 587.667, "encoder_q-layer.8": 660.6267, "encoder_q-layer.9": 583.2739, "epoch": 0.88, "inbatch_neg_score": 0.595, "inbatch_pos_score": 1.2744, "learning_rate": 5.444444444444445e-06, "loss": 3.0412, "norm_diff": 0.0946, "norm_loss": 0.0, "num_token_doc": 66.8988, "num_token_overlap": 15.7693, "num_token_query": 42.2063, "num_token_union": 68.5016, "num_word_context": 202.4232, "num_word_doc": 49.8878, "num_word_query": 31.8586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 995.4288, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 1.4586, "queue_k_norm": 1.5602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2063, "sent_len_1": 66.8988, "sent_len_max_0": 128.0, "sent_len_max_1": 193.0213, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.0223, "doc_norm": 1.5626, "encoder_q-embeddings": 635.6914, "encoder_q-layer.0": 434.2479, "encoder_q-layer.1": 471.3613, "encoder_q-layer.10": 674.8541, "encoder_q-layer.11": 1559.8684, "encoder_q-layer.2": 545.0701, "encoder_q-layer.3": 598.4411, "encoder_q-layer.4": 664.0721, "encoder_q-layer.5": 734.5852, "encoder_q-layer.6": 719.2542, "encoder_q-layer.7": 753.728, "encoder_q-layer.8": 716.8527, "encoder_q-layer.9": 657.2256, "epoch": 0.88, "inbatch_neg_score": 0.5964, "inbatch_pos_score": 1.2715, "learning_rate": 5.388888888888889e-06, "loss": 3.0223, "norm_diff": 0.0927, "norm_loss": 0.0, "num_token_doc": 67.0763, "num_token_overlap": 15.8925, "num_token_query": 42.3228, "num_token_union": 68.6076, "num_word_context": 202.3952, "num_word_doc": 50.0811, "num_word_query": 31.9392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1137.8726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5967, "query_norm": 1.47, "queue_k_norm": 1.5591, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3228, "sent_len_1": 67.0763, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5213, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0362, "doc_norm": 1.5538, "encoder_q-embeddings": 961.4163, "encoder_q-layer.0": 647.3458, "encoder_q-layer.1": 751.7687, "encoder_q-layer.10": 577.0732, "encoder_q-layer.11": 1475.6577, "encoder_q-layer.2": 870.7286, "encoder_q-layer.3": 890.7831, "encoder_q-layer.4": 939.8425, "encoder_q-layer.5": 961.0145, "encoder_q-layer.6": 902.486, "encoder_q-layer.7": 970.3096, "encoder_q-layer.8": 843.6943, "encoder_q-layer.9": 628.0467, "epoch": 0.88, "inbatch_neg_score": 0.595, "inbatch_pos_score": 1.2793, "learning_rate": 5.333333333333334e-06, "loss": 3.0362, "norm_diff": 0.0905, "norm_loss": 0.0, "num_token_doc": 66.8473, "num_token_overlap": 15.9038, "num_token_query": 42.3893, "num_token_union": 68.4783, "num_word_context": 202.055, "num_word_doc": 49.8921, "num_word_query": 32.0354, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1393.4571, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 1.4633, "queue_k_norm": 1.5586, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3893, "sent_len_1": 66.8473, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.71, "stdk": 0.0486, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 58.2031, "active_queue_size": 16384.0, "cl_loss": 3.0257, "doc_norm": 1.561, "encoder_q-embeddings": 601.1149, "encoder_q-layer.0": 387.7325, "encoder_q-layer.1": 421.7141, "encoder_q-layer.10": 590.9186, "encoder_q-layer.11": 1442.5166, "encoder_q-layer.2": 479.0606, "encoder_q-layer.3": 535.2581, "encoder_q-layer.4": 584.9835, "encoder_q-layer.5": 588.6592, "encoder_q-layer.6": 627.9303, "encoder_q-layer.7": 646.8261, "encoder_q-layer.8": 668.5214, "encoder_q-layer.9": 579.2664, "epoch": 0.88, "inbatch_neg_score": 0.597, "inbatch_pos_score": 1.3027, "learning_rate": 5.277777777777778e-06, "loss": 3.0257, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.6073, "num_token_overlap": 15.8716, "num_token_query": 42.4302, "num_token_union": 68.4316, "num_word_context": 202.142, "num_word_doc": 49.7068, "num_word_query": 32.0394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1026.3716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5967, "query_norm": 1.4714, "queue_k_norm": 1.5607, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4302, "sent_len_1": 66.6073, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.9588, "stdk": 0.0489, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0462, "doc_norm": 1.5567, "encoder_q-embeddings": 894.1498, "encoder_q-layer.0": 653.2352, "encoder_q-layer.1": 680.2892, "encoder_q-layer.10": 643.9409, "encoder_q-layer.11": 1459.2169, "encoder_q-layer.2": 817.7094, "encoder_q-layer.3": 871.9091, "encoder_q-layer.4": 814.0875, "encoder_q-layer.5": 805.3492, "encoder_q-layer.6": 785.7651, "encoder_q-layer.7": 858.6548, "encoder_q-layer.8": 1067.5264, "encoder_q-layer.9": 749.498, "epoch": 0.88, "inbatch_neg_score": 0.5987, "inbatch_pos_score": 1.2871, "learning_rate": 5.2222222222222226e-06, "loss": 3.0462, "norm_diff": 0.0881, "norm_loss": 0.0, "num_token_doc": 66.6166, "num_token_overlap": 15.7834, "num_token_query": 42.2772, "num_token_union": 68.3129, "num_word_context": 201.8384, "num_word_doc": 49.6838, "num_word_query": 31.9124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1361.518, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5986, "query_norm": 1.4686, "queue_k_norm": 1.5592, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2772, "sent_len_1": 66.6166, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2475, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 53.5156, "active_queue_size": 16384.0, "cl_loss": 3.0433, "doc_norm": 1.5559, "encoder_q-embeddings": 606.1743, "encoder_q-layer.0": 412.3952, "encoder_q-layer.1": 466.3452, "encoder_q-layer.10": 638.9564, "encoder_q-layer.11": 1561.49, "encoder_q-layer.2": 581.4105, "encoder_q-layer.3": 618.4052, "encoder_q-layer.4": 654.7516, "encoder_q-layer.5": 721.2679, "encoder_q-layer.6": 729.6166, "encoder_q-layer.7": 762.3155, "encoder_q-layer.8": 723.9144, "encoder_q-layer.9": 610.3309, "epoch": 0.89, "inbatch_neg_score": 0.5967, "inbatch_pos_score": 1.2793, "learning_rate": 5.166666666666667e-06, "loss": 3.0433, "norm_diff": 0.0892, "norm_loss": 0.0, "num_token_doc": 66.7748, "num_token_overlap": 15.9218, "num_token_query": 42.5387, "num_token_union": 68.5141, "num_word_context": 202.343, "num_word_doc": 49.8411, "num_word_query": 32.1343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1132.4712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 1.4667, "queue_k_norm": 1.5594, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5387, "sent_len_1": 66.7748, "sent_len_max_0": 127.9688, "sent_len_max_1": 187.7587, "stdk": 0.0486, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 53.125, "active_queue_size": 16384.0, "cl_loss": 3.048, "doc_norm": 1.5525, "encoder_q-embeddings": 1195.3721, "encoder_q-layer.0": 842.1644, "encoder_q-layer.1": 939.6027, "encoder_q-layer.10": 663.9633, "encoder_q-layer.11": 1513.7712, "encoder_q-layer.2": 1080.8929, "encoder_q-layer.3": 1297.9221, "encoder_q-layer.4": 1368.7592, "encoder_q-layer.5": 1458.179, "encoder_q-layer.6": 1507.2335, "encoder_q-layer.7": 1277.4414, "encoder_q-layer.8": 797.0027, "encoder_q-layer.9": 611.0919, "epoch": 0.89, "inbatch_neg_score": 0.5938, "inbatch_pos_score": 1.2656, "learning_rate": 5.1111111111111115e-06, "loss": 3.048, "norm_diff": 0.0939, "norm_loss": 0.0, "num_token_doc": 66.7415, "num_token_overlap": 15.7844, "num_token_query": 42.3296, "num_token_union": 68.4886, "num_word_context": 202.4602, "num_word_doc": 49.8, "num_word_query": 31.9528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1774.1032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5942, "query_norm": 1.4586, "queue_k_norm": 1.5595, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3296, "sent_len_1": 66.7415, "sent_len_max_0": 127.96, "sent_len_max_1": 189.3988, "stdk": 0.0485, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 3.0484, "doc_norm": 1.5611, "encoder_q-embeddings": 912.0118, "encoder_q-layer.0": 652.5501, "encoder_q-layer.1": 758.3031, "encoder_q-layer.10": 578.1235, "encoder_q-layer.11": 1380.0125, "encoder_q-layer.2": 879.8953, "encoder_q-layer.3": 881.2568, "encoder_q-layer.4": 964.7887, "encoder_q-layer.5": 1066.8934, "encoder_q-layer.6": 1158.4229, "encoder_q-layer.7": 1381.4723, "encoder_q-layer.8": 1144.3126, "encoder_q-layer.9": 741.9549, "epoch": 0.89, "inbatch_neg_score": 0.5962, "inbatch_pos_score": 1.3047, "learning_rate": 5.0555555555555555e-06, "loss": 3.0484, "norm_diff": 0.0907, "norm_loss": 0.0, "num_token_doc": 67.0798, "num_token_overlap": 15.8242, "num_token_query": 42.1557, "num_token_union": 68.563, "num_word_context": 202.4225, "num_word_doc": 50.0223, "num_word_query": 31.8066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1485.5533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5972, "query_norm": 1.4704, "queue_k_norm": 1.561, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1557, "sent_len_1": 67.0798, "sent_len_max_0": 127.9875, "sent_len_max_1": 191.9688, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 51.6602, "active_queue_size": 16384.0, "cl_loss": 3.0439, "doc_norm": 1.5628, "encoder_q-embeddings": 1195.89, "encoder_q-layer.0": 824.0717, "encoder_q-layer.1": 895.3164, "encoder_q-layer.10": 1212.5537, "encoder_q-layer.11": 2967.7983, "encoder_q-layer.2": 1033.5712, "encoder_q-layer.3": 1078.9121, "encoder_q-layer.4": 1205.4819, "encoder_q-layer.5": 1118.0759, "encoder_q-layer.6": 1177.4524, "encoder_q-layer.7": 1265.0601, "encoder_q-layer.8": 1354.6222, "encoder_q-layer.9": 1150.1, "epoch": 0.89, "inbatch_neg_score": 0.5992, "inbatch_pos_score": 1.2852, "learning_rate": 5e-06, "loss": 3.0439, "norm_diff": 0.1012, "norm_loss": 0.0, "num_token_doc": 66.8259, "num_token_overlap": 15.8475, "num_token_query": 42.3568, "num_token_union": 68.5404, "num_word_context": 202.6209, "num_word_doc": 49.9085, "num_word_query": 31.9945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2107.5108, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5986, "query_norm": 1.4616, "queue_k_norm": 1.5611, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3568, "sent_len_1": 66.8259, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.1037, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0329, "doc_norm": 1.5602, "encoder_q-embeddings": 1027.0557, "encoder_q-layer.0": 685.4642, "encoder_q-layer.1": 746.2553, "encoder_q-layer.10": 1161.3802, "encoder_q-layer.11": 2962.2959, "encoder_q-layer.2": 848.3674, "encoder_q-layer.3": 858.452, "encoder_q-layer.4": 907.5794, "encoder_q-layer.5": 923.0897, "encoder_q-layer.6": 961.0043, "encoder_q-layer.7": 1077.5491, "encoder_q-layer.8": 1242.428, "encoder_q-layer.9": 1137.5343, "epoch": 0.89, "inbatch_neg_score": 0.5926, "inbatch_pos_score": 1.2979, "learning_rate": 4.9444444444444444e-06, "loss": 3.0329, "norm_diff": 0.0967, "norm_loss": 0.0, "num_token_doc": 66.9868, "num_token_overlap": 15.8257, "num_token_query": 42.3149, "num_token_union": 68.6039, "num_word_context": 202.5729, "num_word_doc": 50.013, "num_word_query": 31.9578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1956.3032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5933, "query_norm": 1.4635, "queue_k_norm": 1.5617, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3149, "sent_len_1": 66.9868, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.1475, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0536, "doc_norm": 1.5621, "encoder_q-embeddings": 4114.645, "encoder_q-layer.0": 2920.6729, "encoder_q-layer.1": 3387.2041, "encoder_q-layer.10": 1368.7571, "encoder_q-layer.11": 3046.918, "encoder_q-layer.2": 3767.0444, "encoder_q-layer.3": 3922.9866, "encoder_q-layer.4": 4282.0552, "encoder_q-layer.5": 4236.4517, "encoder_q-layer.6": 4207.7578, "encoder_q-layer.7": 4048.7729, "encoder_q-layer.8": 3139.5249, "encoder_q-layer.9": 1432.2596, "epoch": 0.89, "inbatch_neg_score": 0.5978, "inbatch_pos_score": 1.2988, "learning_rate": 4.888888888888889e-06, "loss": 3.0536, "norm_diff": 0.1002, "norm_loss": 0.0, "num_token_doc": 66.7173, "num_token_overlap": 15.8626, "num_token_query": 42.4196, "num_token_union": 68.4668, "num_word_context": 202.1957, "num_word_doc": 49.7797, "num_word_query": 32.0594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5313.3344, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5977, "query_norm": 1.4619, "queue_k_norm": 1.5598, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4196, "sent_len_1": 66.7173, "sent_len_max_0": 127.9963, "sent_len_max_1": 191.0838, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0477, "doc_norm": 1.5653, "encoder_q-embeddings": 1087.2538, "encoder_q-layer.0": 730.3408, "encoder_q-layer.1": 815.4628, "encoder_q-layer.10": 1214.2186, "encoder_q-layer.11": 2898.0288, "encoder_q-layer.2": 932.1066, "encoder_q-layer.3": 980.0404, "encoder_q-layer.4": 1082.9813, "encoder_q-layer.5": 1124.1268, "encoder_q-layer.6": 1158.0536, "encoder_q-layer.7": 1190.1061, "encoder_q-layer.8": 1305.5265, "encoder_q-layer.9": 1157.0099, "epoch": 0.89, "inbatch_neg_score": 0.5993, "inbatch_pos_score": 1.292, "learning_rate": 4.833333333333333e-06, "loss": 3.0477, "norm_diff": 0.0913, "norm_loss": 0.0, "num_token_doc": 66.9493, "num_token_overlap": 15.8317, "num_token_query": 42.4733, "num_token_union": 68.6277, "num_word_context": 202.5993, "num_word_doc": 49.9232, "num_word_query": 32.1088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2023.1984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5986, "query_norm": 1.4739, "queue_k_norm": 1.5598, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4733, "sent_len_1": 66.9493, "sent_len_max_0": 127.99, "sent_len_max_1": 188.8413, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0395, "doc_norm": 1.5568, "encoder_q-embeddings": 1127.4011, "encoder_q-layer.0": 791.2426, "encoder_q-layer.1": 810.3832, "encoder_q-layer.10": 1265.4176, "encoder_q-layer.11": 2999.3652, "encoder_q-layer.2": 926.0449, "encoder_q-layer.3": 888.3083, "encoder_q-layer.4": 917.0914, "encoder_q-layer.5": 963.7228, "encoder_q-layer.6": 1084.3984, "encoder_q-layer.7": 1142.0765, "encoder_q-layer.8": 1288.24, "encoder_q-layer.9": 1212.5402, "epoch": 0.89, "inbatch_neg_score": 0.5964, "inbatch_pos_score": 1.2979, "learning_rate": 4.777777777777778e-06, "loss": 3.0395, "norm_diff": 0.0777, "norm_loss": 0.0, "num_token_doc": 66.6946, "num_token_overlap": 15.824, "num_token_query": 42.3957, "num_token_union": 68.4704, "num_word_context": 202.2354, "num_word_doc": 49.7647, "num_word_query": 32.0127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2008.2034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5962, "query_norm": 1.4791, "queue_k_norm": 1.5604, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3957, "sent_len_1": 66.6946, "sent_len_max_0": 127.985, "sent_len_max_1": 190.2025, "stdk": 0.0487, "stdq": 0.0456, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0458, "doc_norm": 1.5608, "encoder_q-embeddings": 1185.3805, "encoder_q-layer.0": 819.4746, "encoder_q-layer.1": 922.9503, "encoder_q-layer.10": 1252.5934, "encoder_q-layer.11": 2921.0427, "encoder_q-layer.2": 1054.6039, "encoder_q-layer.3": 1074.8778, "encoder_q-layer.4": 1100.1162, "encoder_q-layer.5": 1095.326, "encoder_q-layer.6": 1222.2938, "encoder_q-layer.7": 1408.6581, "encoder_q-layer.8": 1488.4867, "encoder_q-layer.9": 1211.2416, "epoch": 0.89, "inbatch_neg_score": 0.5999, "inbatch_pos_score": 1.2764, "learning_rate": 4.722222222222222e-06, "loss": 3.0458, "norm_diff": 0.091, "norm_loss": 0.0, "num_token_doc": 66.8131, "num_token_overlap": 15.8756, "num_token_query": 42.5587, "num_token_union": 68.5968, "num_word_context": 202.4994, "num_word_doc": 49.835, "num_word_query": 32.1584, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.0415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5996, "query_norm": 1.4698, "queue_k_norm": 1.5612, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.5587, "sent_len_1": 66.8131, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4913, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0353, "doc_norm": 1.5657, "encoder_q-embeddings": 1131.8098, "encoder_q-layer.0": 760.8796, "encoder_q-layer.1": 831.0145, "encoder_q-layer.10": 1250.8945, "encoder_q-layer.11": 2856.5364, "encoder_q-layer.2": 967.0752, "encoder_q-layer.3": 1029.3459, "encoder_q-layer.4": 1011.0475, "encoder_q-layer.5": 991.3313, "encoder_q-layer.6": 1096.8359, "encoder_q-layer.7": 1160.272, "encoder_q-layer.8": 1271.9495, "encoder_q-layer.9": 1134.5571, "epoch": 0.89, "inbatch_neg_score": 0.5969, "inbatch_pos_score": 1.292, "learning_rate": 4.666666666666667e-06, "loss": 3.0353, "norm_diff": 0.0971, "norm_loss": 0.0, "num_token_doc": 66.9827, "num_token_overlap": 15.9019, "num_token_query": 42.5646, "num_token_union": 68.6548, "num_word_context": 202.442, "num_word_doc": 49.9824, "num_word_query": 32.1656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1975.2103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5967, "query_norm": 1.4686, "queue_k_norm": 1.5612, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5646, "sent_len_1": 66.9827, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0588, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.0403, "doc_norm": 1.5602, "encoder_q-embeddings": 1229.0089, "encoder_q-layer.0": 858.8877, "encoder_q-layer.1": 951.6466, "encoder_q-layer.10": 1446.2959, "encoder_q-layer.11": 3045.1819, "encoder_q-layer.2": 1045.4268, "encoder_q-layer.3": 1098.4095, "encoder_q-layer.4": 1128.403, "encoder_q-layer.5": 1135.053, "encoder_q-layer.6": 1233.736, "encoder_q-layer.7": 1229.1987, "encoder_q-layer.8": 1339.8663, "encoder_q-layer.9": 1200.7938, "epoch": 0.9, "inbatch_neg_score": 0.6002, "inbatch_pos_score": 1.2832, "learning_rate": 4.611111111111111e-06, "loss": 3.0403, "norm_diff": 0.0981, "norm_loss": 0.0, "num_token_doc": 66.8641, "num_token_overlap": 15.7795, "num_token_query": 42.2711, "num_token_union": 68.5784, "num_word_context": 202.3428, "num_word_doc": 49.8956, "num_word_query": 31.9467, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2139.5041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5986, "query_norm": 1.4621, "queue_k_norm": 1.5603, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2711, "sent_len_1": 66.8641, "sent_len_max_0": 128.0, "sent_len_max_1": 190.3663, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 58.6914, "active_queue_size": 16384.0, "cl_loss": 3.0319, "doc_norm": 1.5627, "encoder_q-embeddings": 4241.5127, "encoder_q-layer.0": 2811.8813, "encoder_q-layer.1": 3397.4065, "encoder_q-layer.10": 1237.4707, "encoder_q-layer.11": 2784.2874, "encoder_q-layer.2": 3990.498, "encoder_q-layer.3": 3679.8997, "encoder_q-layer.4": 4171.8032, "encoder_q-layer.5": 4384.9277, "encoder_q-layer.6": 3763.3416, "encoder_q-layer.7": 2931.9482, "encoder_q-layer.8": 2798.2332, "encoder_q-layer.9": 1769.0754, "epoch": 0.9, "inbatch_neg_score": 0.6003, "inbatch_pos_score": 1.3047, "learning_rate": 4.555555555555556e-06, "loss": 3.0319, "norm_diff": 0.0981, "norm_loss": 0.0, "num_token_doc": 66.8381, "num_token_overlap": 15.8473, "num_token_query": 42.377, "num_token_union": 68.5225, "num_word_context": 202.2976, "num_word_doc": 49.8793, "num_word_query": 32.0264, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5074.5515, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6001, "query_norm": 1.4647, "queue_k_norm": 1.5603, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.377, "sent_len_1": 66.8381, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.7375, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.0525, "doc_norm": 1.5602, "encoder_q-embeddings": 1021.398, "encoder_q-layer.0": 711.5132, "encoder_q-layer.1": 769.197, "encoder_q-layer.10": 1150.5557, "encoder_q-layer.11": 2891.9448, "encoder_q-layer.2": 901.7368, "encoder_q-layer.3": 959.2594, "encoder_q-layer.4": 1000.4718, "encoder_q-layer.5": 981.5059, "encoder_q-layer.6": 1066.2451, "encoder_q-layer.7": 1101.9324, "encoder_q-layer.8": 1254.8801, "encoder_q-layer.9": 1139.7227, "epoch": 0.9, "inbatch_neg_score": 0.6, "inbatch_pos_score": 1.2852, "learning_rate": 4.5e-06, "loss": 3.0525, "norm_diff": 0.1043, "norm_loss": 0.0, "num_token_doc": 66.614, "num_token_overlap": 15.7862, "num_token_query": 42.2497, "num_token_union": 68.341, "num_word_context": 202.0528, "num_word_doc": 49.7027, "num_word_query": 31.9279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1973.8986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6001, "query_norm": 1.456, "queue_k_norm": 1.56, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2497, "sent_len_1": 66.614, "sent_len_max_0": 127.995, "sent_len_max_1": 191.41, "stdk": 0.0488, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0451, "doc_norm": 1.5647, "encoder_q-embeddings": 976.7679, "encoder_q-layer.0": 650.7051, "encoder_q-layer.1": 704.8102, "encoder_q-layer.10": 1211.5416, "encoder_q-layer.11": 2946.311, "encoder_q-layer.2": 786.0776, "encoder_q-layer.3": 834.4379, "encoder_q-layer.4": 901.8083, "encoder_q-layer.5": 912.2305, "encoder_q-layer.6": 1014.1149, "encoder_q-layer.7": 1195.4182, "encoder_q-layer.8": 1234.6324, "encoder_q-layer.9": 1169.2979, "epoch": 0.9, "inbatch_neg_score": 0.6013, "inbatch_pos_score": 1.3086, "learning_rate": 4.444444444444445e-06, "loss": 3.0451, "norm_diff": 0.09, "norm_loss": 0.0, "num_token_doc": 66.8057, "num_token_overlap": 15.8071, "num_token_query": 42.2131, "num_token_union": 68.4031, "num_word_context": 202.087, "num_word_doc": 49.8146, "num_word_query": 31.8707, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1941.3862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6011, "query_norm": 1.4747, "queue_k_norm": 1.5591, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2131, "sent_len_1": 66.8057, "sent_len_max_0": 127.9613, "sent_len_max_1": 192.52, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.0338, "doc_norm": 1.5631, "encoder_q-embeddings": 1084.8491, "encoder_q-layer.0": 698.3002, "encoder_q-layer.1": 753.221, "encoder_q-layer.10": 1259.6775, "encoder_q-layer.11": 3053.9993, "encoder_q-layer.2": 840.3922, "encoder_q-layer.3": 897.764, "encoder_q-layer.4": 982.5866, "encoder_q-layer.5": 969.917, "encoder_q-layer.6": 1081.5985, "encoder_q-layer.7": 1222.5305, "encoder_q-layer.8": 1278.4578, "encoder_q-layer.9": 1207.9384, "epoch": 0.9, "inbatch_neg_score": 0.5967, "inbatch_pos_score": 1.2744, "learning_rate": 4.388888888888889e-06, "loss": 3.0338, "norm_diff": 0.091, "norm_loss": 0.0, "num_token_doc": 66.7795, "num_token_overlap": 15.9082, "num_token_query": 42.5258, "num_token_union": 68.5399, "num_word_context": 202.2379, "num_word_doc": 49.8112, "num_word_query": 32.1353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2026.3051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5981, "query_norm": 1.472, "queue_k_norm": 1.5603, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.5258, "sent_len_1": 66.7795, "sent_len_max_0": 128.0, "sent_len_max_1": 188.77, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.0535, "doc_norm": 1.5562, "encoder_q-embeddings": 1085.3187, "encoder_q-layer.0": 713.2649, "encoder_q-layer.1": 795.7662, "encoder_q-layer.10": 1205.2867, "encoder_q-layer.11": 2974.1941, "encoder_q-layer.2": 925.6906, "encoder_q-layer.3": 916.9619, "encoder_q-layer.4": 987.5479, "encoder_q-layer.5": 1044.2335, "encoder_q-layer.6": 1135.4286, "encoder_q-layer.7": 1214.4183, "encoder_q-layer.8": 1349.7452, "encoder_q-layer.9": 1171.686, "epoch": 0.9, "inbatch_neg_score": 0.601, "inbatch_pos_score": 1.291, "learning_rate": 4.333333333333334e-06, "loss": 3.0535, "norm_diff": 0.0844, "norm_loss": 0.0, "num_token_doc": 66.7679, "num_token_overlap": 15.7723, "num_token_query": 42.1172, "num_token_union": 68.4204, "num_word_context": 201.8944, "num_word_doc": 49.7859, "num_word_query": 31.8075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2018.4511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6001, "query_norm": 1.4718, "queue_k_norm": 1.5605, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1172, "sent_len_1": 66.7679, "sent_len_max_0": 128.0, "sent_len_max_1": 189.5337, "stdk": 0.0486, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0451, "doc_norm": 1.5577, "encoder_q-embeddings": 1507.1265, "encoder_q-layer.0": 1068.3228, "encoder_q-layer.1": 1225.0128, "encoder_q-layer.10": 1256.9382, "encoder_q-layer.11": 3065.4351, "encoder_q-layer.2": 1358.369, "encoder_q-layer.3": 1421.3252, "encoder_q-layer.4": 1484.7052, "encoder_q-layer.5": 1472.6182, "encoder_q-layer.6": 1483.2281, "encoder_q-layer.7": 1493.9691, "encoder_q-layer.8": 1469.1752, "encoder_q-layer.9": 1247.0568, "epoch": 0.9, "inbatch_neg_score": 0.5978, "inbatch_pos_score": 1.2842, "learning_rate": 4.277777777777778e-06, "loss": 3.0451, "norm_diff": 0.0899, "norm_loss": 0.0, "num_token_doc": 66.6629, "num_token_overlap": 15.8396, "num_token_query": 42.3, "num_token_union": 68.3765, "num_word_context": 202.1729, "num_word_doc": 49.7216, "num_word_query": 31.9667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2422.5808, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5991, "query_norm": 1.4678, "queue_k_norm": 1.5602, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3, "sent_len_1": 66.6629, "sent_len_max_0": 128.0, "sent_len_max_1": 189.2775, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0495, "doc_norm": 1.5567, "encoder_q-embeddings": 1174.7539, "encoder_q-layer.0": 802.8324, "encoder_q-layer.1": 905.0811, "encoder_q-layer.10": 1182.2634, "encoder_q-layer.11": 2872.0962, "encoder_q-layer.2": 1041.8965, "encoder_q-layer.3": 1139.3835, "encoder_q-layer.4": 1238.8719, "encoder_q-layer.5": 1293.4336, "encoder_q-layer.6": 1359.2694, "encoder_q-layer.7": 1348.9799, "encoder_q-layer.8": 1363.0253, "encoder_q-layer.9": 1151.6227, "epoch": 0.9, "inbatch_neg_score": 0.6019, "inbatch_pos_score": 1.2832, "learning_rate": 4.222222222222223e-06, "loss": 3.0495, "norm_diff": 0.0912, "norm_loss": 0.0, "num_token_doc": 66.7822, "num_token_overlap": 15.8498, "num_token_query": 42.3354, "num_token_union": 68.4494, "num_word_context": 202.4177, "num_word_doc": 49.8232, "num_word_query": 31.9736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2105.988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6006, "query_norm": 1.4655, "queue_k_norm": 1.5608, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3354, "sent_len_1": 66.7822, "sent_len_max_0": 127.995, "sent_len_max_1": 189.1337, "stdk": 0.0486, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 54.4922, "active_queue_size": 16384.0, "cl_loss": 3.0564, "doc_norm": 1.5551, "encoder_q-embeddings": 1025.4229, "encoder_q-layer.0": 672.6296, "encoder_q-layer.1": 709.618, "encoder_q-layer.10": 1155.4395, "encoder_q-layer.11": 2889.1819, "encoder_q-layer.2": 808.8903, "encoder_q-layer.3": 856.5795, "encoder_q-layer.4": 875.9104, "encoder_q-layer.5": 938.2614, "encoder_q-layer.6": 1044.8759, "encoder_q-layer.7": 1117.2723, "encoder_q-layer.8": 1300.7538, "encoder_q-layer.9": 1147.0769, "epoch": 0.9, "inbatch_neg_score": 0.6028, "inbatch_pos_score": 1.2754, "learning_rate": 4.166666666666667e-06, "loss": 3.0564, "norm_diff": 0.085, "norm_loss": 0.0, "num_token_doc": 66.5174, "num_token_overlap": 15.8178, "num_token_query": 42.316, "num_token_union": 68.3524, "num_word_context": 201.9686, "num_word_doc": 49.6842, "num_word_query": 31.9659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1930.9773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.603, "query_norm": 1.4701, "queue_k_norm": 1.5587, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.316, "sent_len_1": 66.5174, "sent_len_max_0": 128.0, "sent_len_max_1": 187.495, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 58.3984, "active_queue_size": 16384.0, "cl_loss": 3.0479, "doc_norm": 1.5637, "encoder_q-embeddings": 1607.0338, "encoder_q-layer.0": 1099.6172, "encoder_q-layer.1": 1190.2238, "encoder_q-layer.10": 1199.2056, "encoder_q-layer.11": 2959.9011, "encoder_q-layer.2": 1367.6528, "encoder_q-layer.3": 1411.1646, "encoder_q-layer.4": 1548.6254, "encoder_q-layer.5": 1605.7274, "encoder_q-layer.6": 1692.8953, "encoder_q-layer.7": 1685.3177, "encoder_q-layer.8": 1595.051, "encoder_q-layer.9": 1169.9958, "epoch": 0.9, "inbatch_neg_score": 0.5986, "inbatch_pos_score": 1.3125, "learning_rate": 4.111111111111112e-06, "loss": 3.0479, "norm_diff": 0.0949, "norm_loss": 0.0, "num_token_doc": 66.7654, "num_token_overlap": 15.7678, "num_token_query": 42.1983, "num_token_union": 68.4516, "num_word_context": 201.9717, "num_word_doc": 49.8099, "num_word_query": 31.873, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2464.2045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5996, "query_norm": 1.4688, "queue_k_norm": 1.5602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1983, "sent_len_1": 66.7654, "sent_len_max_0": 127.9925, "sent_len_max_1": 193.1025, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 52.9297, "active_queue_size": 16384.0, "cl_loss": 3.0184, "doc_norm": 1.5637, "encoder_q-embeddings": 1132.7524, "encoder_q-layer.0": 794.745, "encoder_q-layer.1": 869.5499, "encoder_q-layer.10": 1407.4703, "encoder_q-layer.11": 3176.2009, "encoder_q-layer.2": 1010.0621, "encoder_q-layer.3": 1064.4312, "encoder_q-layer.4": 1122.7389, "encoder_q-layer.5": 1137.5011, "encoder_q-layer.6": 1181.0447, "encoder_q-layer.7": 1177.3948, "encoder_q-layer.8": 1375.9094, "encoder_q-layer.9": 1238.9781, "epoch": 0.91, "inbatch_neg_score": 0.6002, "inbatch_pos_score": 1.2627, "learning_rate": 4.055555555555556e-06, "loss": 3.0184, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.7004, "num_token_overlap": 15.8917, "num_token_query": 42.4196, "num_token_union": 68.4723, "num_word_context": 202.0248, "num_word_doc": 49.7506, "num_word_query": 32.0284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2151.1865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6016, "query_norm": 1.4665, "queue_k_norm": 1.5607, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4196, "sent_len_1": 66.7004, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3162, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0202, "doc_norm": 1.5605, "encoder_q-embeddings": 997.4009, "encoder_q-layer.0": 645.9088, "encoder_q-layer.1": 664.6956, "encoder_q-layer.10": 1298.996, "encoder_q-layer.11": 3038.3853, "encoder_q-layer.2": 756.7571, "encoder_q-layer.3": 796.4191, "encoder_q-layer.4": 822.3921, "encoder_q-layer.5": 855.5212, "encoder_q-layer.6": 1012.6844, "encoder_q-layer.7": 1256.0281, "encoder_q-layer.8": 1346.1709, "encoder_q-layer.9": 1240.6357, "epoch": 0.91, "inbatch_neg_score": 0.6005, "inbatch_pos_score": 1.2891, "learning_rate": 4.000000000000001e-06, "loss": 3.0202, "norm_diff": 0.0985, "norm_loss": 0.0, "num_token_doc": 66.862, "num_token_overlap": 15.9239, "num_token_query": 42.5755, "num_token_union": 68.581, "num_word_context": 202.3513, "num_word_doc": 49.8899, "num_word_query": 32.1658, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2018.5456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6011, "query_norm": 1.462, "queue_k_norm": 1.5624, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5755, "sent_len_1": 66.862, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0312, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 54.0039, "active_queue_size": 16384.0, "cl_loss": 3.0385, "doc_norm": 1.5645, "encoder_q-embeddings": 2684.7544, "encoder_q-layer.0": 1893.8546, "encoder_q-layer.1": 2020.375, "encoder_q-layer.10": 1317.1501, "encoder_q-layer.11": 3260.6055, "encoder_q-layer.2": 2544.5605, "encoder_q-layer.3": 2577.0127, "encoder_q-layer.4": 2927.2539, "encoder_q-layer.5": 3003.2998, "encoder_q-layer.6": 3140.4495, "encoder_q-layer.7": 2835.4619, "encoder_q-layer.8": 2505.2583, "encoder_q-layer.9": 1640.1265, "epoch": 0.91, "inbatch_neg_score": 0.6009, "inbatch_pos_score": 1.29, "learning_rate": 3.944444444444445e-06, "loss": 3.0385, "norm_diff": 0.0928, "norm_loss": 0.0, "num_token_doc": 66.9947, "num_token_overlap": 15.8007, "num_token_query": 42.1201, "num_token_union": 68.5371, "num_word_context": 202.2639, "num_word_doc": 49.9773, "num_word_query": 31.8081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3836.8274, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6006, "query_norm": 1.4717, "queue_k_norm": 1.563, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1201, "sent_len_1": 66.9947, "sent_len_max_0": 128.0, "sent_len_max_1": 191.3688, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 57.8125, "active_queue_size": 16384.0, "cl_loss": 3.0264, "doc_norm": 1.5629, "encoder_q-embeddings": 3204.1841, "encoder_q-layer.0": 2233.0344, "encoder_q-layer.1": 2542.4258, "encoder_q-layer.10": 2817.3745, "encoder_q-layer.11": 6070.4619, "encoder_q-layer.2": 3227.8462, "encoder_q-layer.3": 3353.9343, "encoder_q-layer.4": 3823.1079, "encoder_q-layer.5": 3532.666, "encoder_q-layer.6": 4096.9375, "encoder_q-layer.7": 4140.4062, "encoder_q-layer.8": 4085.8484, "encoder_q-layer.9": 3016.7886, "epoch": 0.91, "inbatch_neg_score": 0.5996, "inbatch_pos_score": 1.3037, "learning_rate": 3.888888888888889e-06, "loss": 3.0264, "norm_diff": 0.0855, "norm_loss": 0.0, "num_token_doc": 66.5509, "num_token_overlap": 15.7792, "num_token_query": 42.2972, "num_token_union": 68.3482, "num_word_context": 202.2251, "num_word_doc": 49.6654, "num_word_query": 31.963, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5478.37, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6006, "query_norm": 1.4773, "queue_k_norm": 1.5609, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2972, "sent_len_1": 66.5509, "sent_len_max_0": 128.0, "sent_len_max_1": 189.05, "stdk": 0.0489, "stdq": 0.0455, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 53.3203, "active_queue_size": 16384.0, "cl_loss": 3.0313, "doc_norm": 1.5595, "encoder_q-embeddings": 1047.4271, "encoder_q-layer.0": 679.7211, "encoder_q-layer.1": 743.0739, "encoder_q-layer.10": 1377.1776, "encoder_q-layer.11": 3055.9016, "encoder_q-layer.2": 821.4799, "encoder_q-layer.3": 862.3112, "encoder_q-layer.4": 898.4453, "encoder_q-layer.5": 933.8441, "encoder_q-layer.6": 1054.1608, "encoder_q-layer.7": 1169.5081, "encoder_q-layer.8": 1301.6162, "encoder_q-layer.9": 1169.9402, "epoch": 0.91, "inbatch_neg_score": 0.6001, "inbatch_pos_score": 1.2715, "learning_rate": 3.833333333333334e-06, "loss": 3.0313, "norm_diff": 0.0946, "norm_loss": 0.0, "num_token_doc": 66.6731, "num_token_overlap": 15.83, "num_token_query": 42.3871, "num_token_union": 68.4605, "num_word_context": 202.1903, "num_word_doc": 49.782, "num_word_query": 32.0216, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1990.5381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5996, "query_norm": 1.4649, "queue_k_norm": 1.5625, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3871, "sent_len_1": 66.6731, "sent_len_max_0": 128.0, "sent_len_max_1": 188.1025, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0554, "doc_norm": 1.558, "encoder_q-embeddings": 1188.5771, "encoder_q-layer.0": 773.6055, "encoder_q-layer.1": 848.11, "encoder_q-layer.10": 1150.6262, "encoder_q-layer.11": 2977.5132, "encoder_q-layer.2": 975.3871, "encoder_q-layer.3": 1015.6024, "encoder_q-layer.4": 1080.543, "encoder_q-layer.5": 1125.8177, "encoder_q-layer.6": 1194.8438, "encoder_q-layer.7": 1200.5591, "encoder_q-layer.8": 1335.3171, "encoder_q-layer.9": 1167.4686, "epoch": 0.91, "inbatch_neg_score": 0.6019, "inbatch_pos_score": 1.3125, "learning_rate": 3.777777777777778e-06, "loss": 3.0554, "norm_diff": 0.0893, "norm_loss": 0.0, "num_token_doc": 66.8706, "num_token_overlap": 15.7845, "num_token_query": 42.1951, "num_token_union": 68.4816, "num_word_context": 202.105, "num_word_doc": 49.8826, "num_word_query": 31.858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2085.1082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6011, "query_norm": 1.4687, "queue_k_norm": 1.5638, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1951, "sent_len_1": 66.8706, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4863, "stdk": 0.0487, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 57.1289, "active_queue_size": 16384.0, "cl_loss": 3.0187, "doc_norm": 1.5616, "encoder_q-embeddings": 972.0052, "encoder_q-layer.0": 658.5942, "encoder_q-layer.1": 748.1573, "encoder_q-layer.10": 1379.2896, "encoder_q-layer.11": 3103.9485, "encoder_q-layer.2": 850.8661, "encoder_q-layer.3": 903.9914, "encoder_q-layer.4": 989.3514, "encoder_q-layer.5": 1097.3131, "encoder_q-layer.6": 1169.6438, "encoder_q-layer.7": 1334.142, "encoder_q-layer.8": 1497.0975, "encoder_q-layer.9": 1292.0065, "epoch": 0.91, "inbatch_neg_score": 0.5986, "inbatch_pos_score": 1.291, "learning_rate": 3.722222222222222e-06, "loss": 3.0187, "norm_diff": 0.0849, "norm_loss": 0.0, "num_token_doc": 66.9031, "num_token_overlap": 15.8645, "num_token_query": 42.2827, "num_token_union": 68.5011, "num_word_context": 202.1444, "num_word_doc": 49.9416, "num_word_query": 31.907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2061.2961, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5981, "query_norm": 1.4767, "queue_k_norm": 1.5628, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2827, "sent_len_1": 66.9031, "sent_len_max_0": 128.0, "sent_len_max_1": 189.205, "stdk": 0.0488, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0449, "doc_norm": 1.5624, "encoder_q-embeddings": 973.8813, "encoder_q-layer.0": 683.7487, "encoder_q-layer.1": 733.7744, "encoder_q-layer.10": 1231.4937, "encoder_q-layer.11": 2974.2358, "encoder_q-layer.2": 867.3885, "encoder_q-layer.3": 928.0517, "encoder_q-layer.4": 978.4991, "encoder_q-layer.5": 1001.4821, "encoder_q-layer.6": 1071.8376, "encoder_q-layer.7": 1222.3679, "encoder_q-layer.8": 1308.7014, "encoder_q-layer.9": 1145.2754, "epoch": 0.91, "inbatch_neg_score": 0.6015, "inbatch_pos_score": 1.2871, "learning_rate": 3.666666666666667e-06, "loss": 3.0449, "norm_diff": 0.0969, "norm_loss": 0.0, "num_token_doc": 67.0162, "num_token_overlap": 15.8023, "num_token_query": 42.2948, "num_token_union": 68.6094, "num_word_context": 202.357, "num_word_doc": 50.0311, "num_word_query": 31.9574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1969.4927, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6016, "query_norm": 1.4655, "queue_k_norm": 1.5623, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2948, "sent_len_1": 67.0162, "sent_len_max_0": 127.9825, "sent_len_max_1": 188.3162, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 56.3477, "active_queue_size": 16384.0, "cl_loss": 3.0342, "doc_norm": 1.5652, "encoder_q-embeddings": 1073.7686, "encoder_q-layer.0": 704.7311, "encoder_q-layer.1": 749.823, "encoder_q-layer.10": 1317.4409, "encoder_q-layer.11": 3194.4746, "encoder_q-layer.2": 842.79, "encoder_q-layer.3": 873.008, "encoder_q-layer.4": 918.8187, "encoder_q-layer.5": 982.369, "encoder_q-layer.6": 1088.5281, "encoder_q-layer.7": 1133.0469, "encoder_q-layer.8": 1295.0216, "encoder_q-layer.9": 1176.7321, "epoch": 0.91, "inbatch_neg_score": 0.6014, "inbatch_pos_score": 1.2939, "learning_rate": 3.611111111111111e-06, "loss": 3.0342, "norm_diff": 0.0938, "norm_loss": 0.0, "num_token_doc": 66.9114, "num_token_overlap": 15.838, "num_token_query": 42.438, "num_token_union": 68.6421, "num_word_context": 202.6405, "num_word_doc": 49.9414, "num_word_query": 32.0571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2044.286, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6021, "query_norm": 1.4714, "queue_k_norm": 1.5644, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.438, "sent_len_1": 66.9114, "sent_len_max_0": 127.9862, "sent_len_max_1": 190.67, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0491, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0391, "doc_norm": 1.5685, "encoder_q-embeddings": 928.5717, "encoder_q-layer.0": 607.4429, "encoder_q-layer.1": 634.8022, "encoder_q-layer.10": 1264.317, "encoder_q-layer.11": 2954.9314, "encoder_q-layer.2": 712.9799, "encoder_q-layer.3": 748.9336, "encoder_q-layer.4": 781.9185, "encoder_q-layer.5": 870.5689, "encoder_q-layer.6": 1016.907, "encoder_q-layer.7": 1156.1836, "encoder_q-layer.8": 1366.9274, "encoder_q-layer.9": 1208.9198, "epoch": 0.91, "inbatch_neg_score": 0.6014, "inbatch_pos_score": 1.293, "learning_rate": 3.555555555555556e-06, "loss": 3.0391, "norm_diff": 0.1003, "norm_loss": 0.0, "num_token_doc": 66.7871, "num_token_overlap": 15.8224, "num_token_query": 42.4346, "num_token_union": 68.5448, "num_word_context": 202.3884, "num_word_doc": 49.7771, "num_word_query": 32.0459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1920.3776, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6006, "query_norm": 1.4682, "queue_k_norm": 1.563, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4346, "sent_len_1": 66.7871, "sent_len_max_0": 128.0, "sent_len_max_1": 192.035, "stdk": 0.0491, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0442, "doc_norm": 1.5672, "encoder_q-embeddings": 3580.0498, "encoder_q-layer.0": 2471.7302, "encoder_q-layer.1": 2535.9055, "encoder_q-layer.10": 1274.0045, "encoder_q-layer.11": 2867.0894, "encoder_q-layer.2": 3198.3533, "encoder_q-layer.3": 3268.8806, "encoder_q-layer.4": 3502.3574, "encoder_q-layer.5": 3731.0706, "encoder_q-layer.6": 3538.1226, "encoder_q-layer.7": 2844.1963, "encoder_q-layer.8": 2082.3821, "encoder_q-layer.9": 1165.4393, "epoch": 0.91, "inbatch_neg_score": 0.5989, "inbatch_pos_score": 1.3115, "learning_rate": 3.5000000000000004e-06, "loss": 3.0442, "norm_diff": 0.0965, "norm_loss": 0.0, "num_token_doc": 66.875, "num_token_overlap": 15.8645, "num_token_query": 42.4369, "num_token_union": 68.564, "num_word_context": 202.5695, "num_word_doc": 49.9078, "num_word_query": 32.0438, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4389.0664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6006, "query_norm": 1.4707, "queue_k_norm": 1.5611, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4369, "sent_len_1": 66.875, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.2775, "stdk": 0.0491, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0307, "doc_norm": 1.5653, "encoder_q-embeddings": 1031.9741, "encoder_q-layer.0": 734.2381, "encoder_q-layer.1": 795.7609, "encoder_q-layer.10": 1288.1458, "encoder_q-layer.11": 3176.4885, "encoder_q-layer.2": 919.8535, "encoder_q-layer.3": 982.6826, "encoder_q-layer.4": 1045.3071, "encoder_q-layer.5": 1076.7938, "encoder_q-layer.6": 1177.8304, "encoder_q-layer.7": 1208.6335, "encoder_q-layer.8": 1364.7532, "encoder_q-layer.9": 1212.5444, "epoch": 0.92, "inbatch_neg_score": 0.6031, "inbatch_pos_score": 1.2998, "learning_rate": 3.4444444444444444e-06, "loss": 3.0307, "norm_diff": 0.0907, "norm_loss": 0.0, "num_token_doc": 67.0818, "num_token_overlap": 15.8752, "num_token_query": 42.4466, "num_token_union": 68.684, "num_word_context": 202.8166, "num_word_doc": 50.0387, "num_word_query": 32.0864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2118.2649, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.604, "query_norm": 1.4746, "queue_k_norm": 1.5631, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4466, "sent_len_1": 67.0818, "sent_len_max_0": 127.9988, "sent_len_max_1": 190.755, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0628, "doc_norm": 1.5634, "encoder_q-embeddings": 1065.6097, "encoder_q-layer.0": 725.7621, "encoder_q-layer.1": 768.2624, "encoder_q-layer.10": 1370.9878, "encoder_q-layer.11": 2978.5142, "encoder_q-layer.2": 921.206, "encoder_q-layer.3": 945.0547, "encoder_q-layer.4": 967.6751, "encoder_q-layer.5": 1043.0548, "encoder_q-layer.6": 1205.0269, "encoder_q-layer.7": 1236.6996, "encoder_q-layer.8": 1329.4231, "encoder_q-layer.9": 1224.4081, "epoch": 0.92, "inbatch_neg_score": 0.6009, "inbatch_pos_score": 1.2861, "learning_rate": 3.3888888888888893e-06, "loss": 3.0628, "norm_diff": 0.0982, "norm_loss": 0.0, "num_token_doc": 66.7174, "num_token_overlap": 15.731, "num_token_query": 42.291, "num_token_union": 68.4941, "num_word_context": 202.4307, "num_word_doc": 49.7716, "num_word_query": 31.9554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2045.3713, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6025, "query_norm": 1.4651, "queue_k_norm": 1.5624, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.291, "sent_len_1": 66.7174, "sent_len_max_0": 128.0, "sent_len_max_1": 189.1637, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0422, "doc_norm": 1.5608, "encoder_q-embeddings": 2643.8018, "encoder_q-layer.0": 1855.0483, "encoder_q-layer.1": 2170.4568, "encoder_q-layer.10": 1268.0898, "encoder_q-layer.11": 3000.9136, "encoder_q-layer.2": 2787.3467, "encoder_q-layer.3": 3097.2988, "encoder_q-layer.4": 3387.6523, "encoder_q-layer.5": 3496.6526, "encoder_q-layer.6": 3683.896, "encoder_q-layer.7": 3684.1377, "encoder_q-layer.8": 2839.9961, "encoder_q-layer.9": 1854.1288, "epoch": 0.92, "inbatch_neg_score": 0.603, "inbatch_pos_score": 1.3018, "learning_rate": 3.3333333333333333e-06, "loss": 3.0422, "norm_diff": 0.0938, "norm_loss": 0.0, "num_token_doc": 66.7828, "num_token_overlap": 15.8314, "num_token_query": 42.3394, "num_token_union": 68.4868, "num_word_context": 202.0954, "num_word_doc": 49.7928, "num_word_query": 31.9885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4242.9366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6025, "query_norm": 1.4669, "queue_k_norm": 1.5631, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3394, "sent_len_1": 66.7828, "sent_len_max_0": 127.9762, "sent_len_max_1": 191.41, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 3.0569, "doc_norm": 1.5599, "encoder_q-embeddings": 960.7365, "encoder_q-layer.0": 631.9084, "encoder_q-layer.1": 680.4271, "encoder_q-layer.10": 1198.3759, "encoder_q-layer.11": 2880.3286, "encoder_q-layer.2": 758.3712, "encoder_q-layer.3": 783.3414, "encoder_q-layer.4": 874.0916, "encoder_q-layer.5": 901.7158, "encoder_q-layer.6": 1053.0945, "encoder_q-layer.7": 1233.9351, "encoder_q-layer.8": 1336.2266, "encoder_q-layer.9": 1139.443, "epoch": 0.92, "inbatch_neg_score": 0.603, "inbatch_pos_score": 1.2969, "learning_rate": 3.277777777777778e-06, "loss": 3.0569, "norm_diff": 0.0983, "norm_loss": 0.0, "num_token_doc": 66.8929, "num_token_overlap": 15.7956, "num_token_query": 42.2923, "num_token_union": 68.5492, "num_word_context": 202.5263, "num_word_doc": 49.8985, "num_word_query": 31.9411, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1908.1512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6025, "query_norm": 1.4615, "queue_k_norm": 1.5632, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2923, "sent_len_1": 66.8929, "sent_len_max_0": 127.9775, "sent_len_max_1": 190.2937, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0375, "doc_norm": 1.5667, "encoder_q-embeddings": 1026.9651, "encoder_q-layer.0": 675.3099, "encoder_q-layer.1": 735.5456, "encoder_q-layer.10": 1251.6608, "encoder_q-layer.11": 2950.5164, "encoder_q-layer.2": 860.6136, "encoder_q-layer.3": 883.1997, "encoder_q-layer.4": 940.3775, "encoder_q-layer.5": 945.6107, "encoder_q-layer.6": 1035.2979, "encoder_q-layer.7": 1233.2131, "encoder_q-layer.8": 1369.7244, "encoder_q-layer.9": 1175.519, "epoch": 0.92, "inbatch_neg_score": 0.6019, "inbatch_pos_score": 1.29, "learning_rate": 3.2222222222222222e-06, "loss": 3.0375, "norm_diff": 0.088, "norm_loss": 0.0, "num_token_doc": 66.78, "num_token_overlap": 15.8883, "num_token_query": 42.4214, "num_token_union": 68.4679, "num_word_context": 202.3926, "num_word_doc": 49.8318, "num_word_query": 32.0471, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1977.3646, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6016, "query_norm": 1.4787, "queue_k_norm": 1.5624, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4214, "sent_len_1": 66.78, "sent_len_max_0": 128.0, "sent_len_max_1": 189.61, "stdk": 0.049, "stdq": 0.0455, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 58.5938, "active_queue_size": 16384.0, "cl_loss": 3.0428, "doc_norm": 1.5615, "encoder_q-embeddings": 1067.5396, "encoder_q-layer.0": 684.8088, "encoder_q-layer.1": 755.4059, "encoder_q-layer.10": 1212.797, "encoder_q-layer.11": 2901.2581, "encoder_q-layer.2": 902.6903, "encoder_q-layer.3": 952.3929, "encoder_q-layer.4": 992.0082, "encoder_q-layer.5": 1004.3691, "encoder_q-layer.6": 1096.4556, "encoder_q-layer.7": 1144.7844, "encoder_q-layer.8": 1261.8448, "encoder_q-layer.9": 1163.9541, "epoch": 0.92, "inbatch_neg_score": 0.6028, "inbatch_pos_score": 1.3184, "learning_rate": 3.166666666666667e-06, "loss": 3.0428, "norm_diff": 0.0857, "norm_loss": 0.0, "num_token_doc": 66.7954, "num_token_overlap": 15.7885, "num_token_query": 42.2981, "num_token_union": 68.5281, "num_word_context": 202.6353, "num_word_doc": 49.7951, "num_word_query": 31.9388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1971.4643, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6025, "query_norm": 1.4757, "queue_k_norm": 1.5631, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.2981, "sent_len_1": 66.7954, "sent_len_max_0": 128.0, "sent_len_max_1": 191.17, "stdk": 0.0488, "stdq": 0.0454, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0416, "doc_norm": 1.5611, "encoder_q-embeddings": 1003.809, "encoder_q-layer.0": 651.6232, "encoder_q-layer.1": 675.8758, "encoder_q-layer.10": 1304.6183, "encoder_q-layer.11": 2962.7585, "encoder_q-layer.2": 756.9986, "encoder_q-layer.3": 797.2397, "encoder_q-layer.4": 839.9636, "encoder_q-layer.5": 855.4625, "encoder_q-layer.6": 1031.7369, "encoder_q-layer.7": 1189.9493, "encoder_q-layer.8": 1296.0496, "encoder_q-layer.9": 1143.9485, "epoch": 0.92, "inbatch_neg_score": 0.606, "inbatch_pos_score": 1.3086, "learning_rate": 3.111111111111111e-06, "loss": 3.0416, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.7235, "num_token_overlap": 15.8123, "num_token_query": 42.281, "num_token_union": 68.3853, "num_word_context": 202.1314, "num_word_doc": 49.7737, "num_word_query": 31.9389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1972.2242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6055, "query_norm": 1.4624, "queue_k_norm": 1.562, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.281, "sent_len_1": 66.7235, "sent_len_max_0": 128.0, "sent_len_max_1": 188.935, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 56.543, "active_queue_size": 16384.0, "cl_loss": 3.0333, "doc_norm": 1.5584, "encoder_q-embeddings": 974.8455, "encoder_q-layer.0": 658.9481, "encoder_q-layer.1": 721.806, "encoder_q-layer.10": 1282.7778, "encoder_q-layer.11": 3064.1873, "encoder_q-layer.2": 798.3969, "encoder_q-layer.3": 834.5145, "encoder_q-layer.4": 846.2782, "encoder_q-layer.5": 889.2875, "encoder_q-layer.6": 968.4593, "encoder_q-layer.7": 1103.0575, "encoder_q-layer.8": 1270.6732, "encoder_q-layer.9": 1203.0725, "epoch": 0.92, "inbatch_neg_score": 0.6054, "inbatch_pos_score": 1.2852, "learning_rate": 3.0555555555555556e-06, "loss": 3.0333, "norm_diff": 0.0944, "norm_loss": 0.0, "num_token_doc": 66.9826, "num_token_overlap": 15.8176, "num_token_query": 42.3432, "num_token_union": 68.5879, "num_word_context": 202.6404, "num_word_doc": 49.9608, "num_word_query": 31.9924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1983.3279, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6045, "query_norm": 1.464, "queue_k_norm": 1.5648, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3432, "sent_len_1": 66.9826, "sent_len_max_0": 127.99, "sent_len_max_1": 191.0938, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0317, "doc_norm": 1.5622, "encoder_q-embeddings": 1113.4408, "encoder_q-layer.0": 738.9637, "encoder_q-layer.1": 821.138, "encoder_q-layer.10": 1331.1272, "encoder_q-layer.11": 3024.8301, "encoder_q-layer.2": 925.4664, "encoder_q-layer.3": 1007.0385, "encoder_q-layer.4": 1077.8485, "encoder_q-layer.5": 1078.7394, "encoder_q-layer.6": 1165.0026, "encoder_q-layer.7": 1262.2916, "encoder_q-layer.8": 1394.7812, "encoder_q-layer.9": 1199.9918, "epoch": 0.92, "inbatch_neg_score": 0.6069, "inbatch_pos_score": 1.3086, "learning_rate": 3e-06, "loss": 3.0317, "norm_diff": 0.0795, "norm_loss": 0.0, "num_token_doc": 66.923, "num_token_overlap": 15.8601, "num_token_query": 42.4689, "num_token_union": 68.6303, "num_word_context": 202.5549, "num_word_doc": 49.9286, "num_word_query": 32.0914, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2108.9656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6079, "query_norm": 1.4826, "queue_k_norm": 1.5641, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4689, "sent_len_1": 66.923, "sent_len_max_0": 127.9988, "sent_len_max_1": 191.5863, "stdk": 0.0489, "stdq": 0.0456, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.05, "doc_norm": 1.5604, "encoder_q-embeddings": 541.8759, "encoder_q-layer.0": 364.3741, "encoder_q-layer.1": 383.5341, "encoder_q-layer.10": 719.538, "encoder_q-layer.11": 1631.6226, "encoder_q-layer.2": 435.8633, "encoder_q-layer.3": 467.6327, "encoder_q-layer.4": 486.2246, "encoder_q-layer.5": 505.9677, "encoder_q-layer.6": 557.2172, "encoder_q-layer.7": 638.2141, "encoder_q-layer.8": 685.4444, "encoder_q-layer.9": 643.5979, "epoch": 0.92, "inbatch_neg_score": 0.6054, "inbatch_pos_score": 1.2842, "learning_rate": 2.9444444444444445e-06, "loss": 3.05, "norm_diff": 0.0927, "norm_loss": 0.0, "num_token_doc": 66.8346, "num_token_overlap": 15.83, "num_token_query": 42.4221, "num_token_union": 68.5896, "num_word_context": 202.2519, "num_word_doc": 49.8721, "num_word_query": 32.0475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1065.4098, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.606, "query_norm": 1.4677, "queue_k_norm": 1.5638, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4221, "sent_len_1": 66.8346, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.8288, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 55.1758, "active_queue_size": 16384.0, "cl_loss": 3.0299, "doc_norm": 1.5651, "encoder_q-embeddings": 688.5753, "encoder_q-layer.0": 446.8614, "encoder_q-layer.1": 482.0374, "encoder_q-layer.10": 687.2285, "encoder_q-layer.11": 1559.8533, "encoder_q-layer.2": 539.4053, "encoder_q-layer.3": 583.7746, "encoder_q-layer.4": 622.2786, "encoder_q-layer.5": 657.1835, "encoder_q-layer.6": 704.419, "encoder_q-layer.7": 785.9974, "encoder_q-layer.8": 800.1325, "encoder_q-layer.9": 682.76, "epoch": 0.93, "inbatch_neg_score": 0.607, "inbatch_pos_score": 1.3027, "learning_rate": 2.888888888888889e-06, "loss": 3.0299, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 66.6479, "num_token_overlap": 15.8089, "num_token_query": 42.4764, "num_token_union": 68.5052, "num_word_context": 202.2698, "num_word_doc": 49.7322, "num_word_query": 32.0891, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1171.1921, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6069, "query_norm": 1.4711, "queue_k_norm": 1.5636, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4764, "sent_len_1": 66.6479, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.635, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 55.957, "active_queue_size": 16384.0, "cl_loss": 3.0535, "doc_norm": 1.5662, "encoder_q-embeddings": 523.9991, "encoder_q-layer.0": 349.1474, "encoder_q-layer.1": 380.5583, "encoder_q-layer.10": 685.3932, "encoder_q-layer.11": 1545.1758, "encoder_q-layer.2": 429.3926, "encoder_q-layer.3": 433.398, "encoder_q-layer.4": 471.4445, "encoder_q-layer.5": 498.0715, "encoder_q-layer.6": 549.5099, "encoder_q-layer.7": 603.7409, "encoder_q-layer.8": 700.9126, "encoder_q-layer.9": 622.4579, "epoch": 0.93, "inbatch_neg_score": 0.6094, "inbatch_pos_score": 1.293, "learning_rate": 2.8333333333333335e-06, "loss": 3.0535, "norm_diff": 0.1012, "norm_loss": 0.0, "num_token_doc": 66.6161, "num_token_overlap": 15.7674, "num_token_query": 42.2694, "num_token_union": 68.4075, "num_word_context": 202.3672, "num_word_doc": 49.6988, "num_word_query": 31.9243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1040.3804, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 1.465, "queue_k_norm": 1.5642, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2694, "sent_len_1": 66.6161, "sent_len_max_0": 127.9988, "sent_len_max_1": 189.0838, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 55.7617, "active_queue_size": 16384.0, "cl_loss": 3.0348, "doc_norm": 1.5641, "encoder_q-embeddings": 711.4889, "encoder_q-layer.0": 481.9753, "encoder_q-layer.1": 560.382, "encoder_q-layer.10": 613.4969, "encoder_q-layer.11": 1547.924, "encoder_q-layer.2": 649.1453, "encoder_q-layer.3": 688.3227, "encoder_q-layer.4": 734.1222, "encoder_q-layer.5": 801.8937, "encoder_q-layer.6": 804.5307, "encoder_q-layer.7": 761.7314, "encoder_q-layer.8": 724.9531, "encoder_q-layer.9": 612.0043, "epoch": 0.93, "inbatch_neg_score": 0.609, "inbatch_pos_score": 1.291, "learning_rate": 2.777777777777778e-06, "loss": 3.0348, "norm_diff": 0.0959, "norm_loss": 0.0, "num_token_doc": 66.8786, "num_token_overlap": 15.7952, "num_token_query": 42.3597, "num_token_union": 68.6027, "num_word_context": 202.5911, "num_word_doc": 49.8696, "num_word_query": 31.9931, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1207.1086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6084, "query_norm": 1.4682, "queue_k_norm": 1.5644, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3597, "sent_len_1": 66.8786, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.55, "stdk": 0.0489, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 53.6133, "active_queue_size": 16384.0, "cl_loss": 3.0615, "doc_norm": 1.5619, "encoder_q-embeddings": 673.6417, "encoder_q-layer.0": 458.7163, "encoder_q-layer.1": 513.0806, "encoder_q-layer.10": 709.3831, "encoder_q-layer.11": 1546.8661, "encoder_q-layer.2": 621.4218, "encoder_q-layer.3": 679.5929, "encoder_q-layer.4": 749.7341, "encoder_q-layer.5": 777.6772, "encoder_q-layer.6": 773.231, "encoder_q-layer.7": 862.7292, "encoder_q-layer.8": 809.8718, "encoder_q-layer.9": 628.6589, "epoch": 0.93, "inbatch_neg_score": 0.6079, "inbatch_pos_score": 1.2744, "learning_rate": 2.7222222222222224e-06, "loss": 3.0615, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.8486, "num_token_overlap": 15.7179, "num_token_query": 42.1041, "num_token_union": 68.4297, "num_word_context": 202.8286, "num_word_doc": 49.8734, "num_word_query": 31.7854, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1198.6754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 1.4593, "queue_k_norm": 1.5636, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1041, "sent_len_1": 66.8486, "sent_len_max_0": 128.0, "sent_len_max_1": 192.025, "stdk": 0.0488, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 58.3008, "active_queue_size": 16384.0, "cl_loss": 3.0551, "doc_norm": 1.5607, "encoder_q-embeddings": 1176.8146, "encoder_q-layer.0": 813.5762, "encoder_q-layer.1": 886.2776, "encoder_q-layer.10": 635.4832, "encoder_q-layer.11": 1509.9747, "encoder_q-layer.2": 1096.9475, "encoder_q-layer.3": 1121.4644, "encoder_q-layer.4": 1161.2629, "encoder_q-layer.5": 1164.9277, "encoder_q-layer.6": 1083.2968, "encoder_q-layer.7": 966.9187, "encoder_q-layer.8": 959.298, "encoder_q-layer.9": 663.908, "epoch": 0.93, "inbatch_neg_score": 0.6097, "inbatch_pos_score": 1.3096, "learning_rate": 2.666666666666667e-06, "loss": 3.0551, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.4927, "num_token_overlap": 15.7418, "num_token_query": 42.214, "num_token_union": 68.3279, "num_word_context": 202.2082, "num_word_doc": 49.6483, "num_word_query": 31.8794, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1589.6008, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.4681, "queue_k_norm": 1.5625, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.214, "sent_len_1": 66.4927, "sent_len_max_0": 127.995, "sent_len_max_1": 188.4412, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0286, "doc_norm": 1.5658, "encoder_q-embeddings": 602.6213, "encoder_q-layer.0": 377.3961, "encoder_q-layer.1": 427.0999, "encoder_q-layer.10": 593.8774, "encoder_q-layer.11": 1485.9738, "encoder_q-layer.2": 503.2663, "encoder_q-layer.3": 523.2444, "encoder_q-layer.4": 554.0759, "encoder_q-layer.5": 582.6758, "encoder_q-layer.6": 598.69, "encoder_q-layer.7": 677.1364, "encoder_q-layer.8": 743.4786, "encoder_q-layer.9": 629.7186, "epoch": 0.93, "inbatch_neg_score": 0.6076, "inbatch_pos_score": 1.2969, "learning_rate": 2.6111111111111113e-06, "loss": 3.0286, "norm_diff": 0.0911, "norm_loss": 0.0, "num_token_doc": 66.7514, "num_token_overlap": 15.8079, "num_token_query": 42.1832, "num_token_union": 68.4276, "num_word_context": 202.3179, "num_word_doc": 49.8481, "num_word_query": 31.8851, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1074.3921, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6079, "query_norm": 1.4747, "queue_k_norm": 1.5653, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1832, "sent_len_1": 66.7514, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.6213, "stdk": 0.049, "stdq": 0.0453, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0594, "doc_norm": 1.5679, "encoder_q-embeddings": 1282.7764, "encoder_q-layer.0": 871.419, "encoder_q-layer.1": 1009.899, "encoder_q-layer.10": 613.5305, "encoder_q-layer.11": 1477.6964, "encoder_q-layer.2": 1144.1606, "encoder_q-layer.3": 1248.8177, "encoder_q-layer.4": 1344.7166, "encoder_q-layer.5": 1742.0304, "encoder_q-layer.6": 1559.6685, "encoder_q-layer.7": 1571.7379, "encoder_q-layer.8": 1380.4484, "encoder_q-layer.9": 682.6876, "epoch": 0.93, "inbatch_neg_score": 0.6086, "inbatch_pos_score": 1.29, "learning_rate": 2.5555555555555557e-06, "loss": 3.0594, "norm_diff": 0.099, "norm_loss": 0.0, "num_token_doc": 66.7324, "num_token_overlap": 15.8038, "num_token_query": 42.1862, "num_token_union": 68.4109, "num_word_context": 202.3577, "num_word_doc": 49.7922, "num_word_query": 31.8451, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1901.8066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6079, "query_norm": 1.4689, "queue_k_norm": 1.5653, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1862, "sent_len_1": 66.7324, "sent_len_max_0": 128.0, "sent_len_max_1": 190.2337, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 57.3242, "active_queue_size": 16384.0, "cl_loss": 3.041, "doc_norm": 1.562, "encoder_q-embeddings": 691.6674, "encoder_q-layer.0": 470.2369, "encoder_q-layer.1": 503.834, "encoder_q-layer.10": 621.9881, "encoder_q-layer.11": 1422.1566, "encoder_q-layer.2": 573.8264, "encoder_q-layer.3": 607.379, "encoder_q-layer.4": 647.4727, "encoder_q-layer.5": 647.6226, "encoder_q-layer.6": 682.4751, "encoder_q-layer.7": 704.9606, "encoder_q-layer.8": 683.8405, "encoder_q-layer.9": 588.8935, "epoch": 0.93, "inbatch_neg_score": 0.6078, "inbatch_pos_score": 1.2871, "learning_rate": 2.5e-06, "loss": 3.041, "norm_diff": 0.1, "norm_loss": 0.0, "num_token_doc": 66.8786, "num_token_overlap": 15.8599, "num_token_query": 42.289, "num_token_union": 68.4839, "num_word_context": 202.2562, "num_word_doc": 49.904, "num_word_query": 31.938, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1086.0203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6079, "query_norm": 1.462, "queue_k_norm": 1.565, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.289, "sent_len_1": 66.8786, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8325, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 59.668, "active_queue_size": 16384.0, "cl_loss": 3.0375, "doc_norm": 1.5659, "encoder_q-embeddings": 463.1081, "encoder_q-layer.0": 309.9196, "encoder_q-layer.1": 328.7865, "encoder_q-layer.10": 653.2844, "encoder_q-layer.11": 1502.7352, "encoder_q-layer.2": 381.8225, "encoder_q-layer.3": 421.7477, "encoder_q-layer.4": 432.8522, "encoder_q-layer.5": 463.1743, "encoder_q-layer.6": 521.2432, "encoder_q-layer.7": 594.9833, "encoder_q-layer.8": 708.8734, "encoder_q-layer.9": 613.1304, "epoch": 0.93, "inbatch_neg_score": 0.6101, "inbatch_pos_score": 1.3242, "learning_rate": 2.4444444444444447e-06, "loss": 3.0375, "norm_diff": 0.1019, "norm_loss": 0.0, "num_token_doc": 66.8937, "num_token_overlap": 15.8885, "num_token_query": 42.3738, "num_token_union": 68.5139, "num_word_context": 202.4015, "num_word_doc": 49.9572, "num_word_query": 32.0212, "postclip_grad_norm": 1.0, "preclip_grad_norm": 982.0328, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.464, "queue_k_norm": 1.5633, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3738, "sent_len_1": 66.8937, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7925, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0126, "doc_norm": 1.5573, "encoder_q-embeddings": 481.0154, "encoder_q-layer.0": 315.6112, "encoder_q-layer.1": 331.5907, "encoder_q-layer.10": 618.7515, "encoder_q-layer.11": 1539.3739, "encoder_q-layer.2": 381.1147, "encoder_q-layer.3": 405.6801, "encoder_q-layer.4": 414.3152, "encoder_q-layer.5": 437.743, "encoder_q-layer.6": 481.2928, "encoder_q-layer.7": 546.7527, "encoder_q-layer.8": 640.4333, "encoder_q-layer.9": 578.3639, "epoch": 0.93, "inbatch_neg_score": 0.6072, "inbatch_pos_score": 1.3105, "learning_rate": 2.388888888888889e-06, "loss": 3.0126, "norm_diff": 0.0873, "norm_loss": 0.0, "num_token_doc": 67.1194, "num_token_overlap": 15.9275, "num_token_query": 42.4611, "num_token_union": 68.7453, "num_word_context": 202.6225, "num_word_doc": 50.0826, "num_word_query": 32.0886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 973.5249, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6069, "query_norm": 1.47, "queue_k_norm": 1.5649, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4611, "sent_len_1": 67.1194, "sent_len_max_0": 128.0, "sent_len_max_1": 191.5925, "stdk": 0.0486, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0268, "doc_norm": 1.557, "encoder_q-embeddings": 522.0745, "encoder_q-layer.0": 348.1015, "encoder_q-layer.1": 391.4974, "encoder_q-layer.10": 611.5246, "encoder_q-layer.11": 1495.5822, "encoder_q-layer.2": 426.0109, "encoder_q-layer.3": 448.7555, "encoder_q-layer.4": 493.1637, "encoder_q-layer.5": 530.8077, "encoder_q-layer.6": 561.8794, "encoder_q-layer.7": 576.4645, "encoder_q-layer.8": 638.5078, "encoder_q-layer.9": 580.5392, "epoch": 0.94, "inbatch_neg_score": 0.6083, "inbatch_pos_score": 1.2812, "learning_rate": 2.3333333333333336e-06, "loss": 3.0268, "norm_diff": 0.0962, "norm_loss": 0.0, "num_token_doc": 66.9962, "num_token_overlap": 15.9322, "num_token_query": 42.5802, "num_token_union": 68.6367, "num_word_context": 202.6777, "num_word_doc": 49.9842, "num_word_query": 32.1456, "postclip_grad_norm": 1.0, "preclip_grad_norm": 996.5224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6089, "query_norm": 1.4608, "queue_k_norm": 1.5635, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5802, "sent_len_1": 66.9962, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.6387, "stdk": 0.0486, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.0344, "doc_norm": 1.5654, "encoder_q-embeddings": 696.8029, "encoder_q-layer.0": 497.7283, "encoder_q-layer.1": 554.0085, "encoder_q-layer.10": 612.6675, "encoder_q-layer.11": 1501.5221, "encoder_q-layer.2": 673.7976, "encoder_q-layer.3": 728.0666, "encoder_q-layer.4": 760.978, "encoder_q-layer.5": 738.5276, "encoder_q-layer.6": 842.8792, "encoder_q-layer.7": 838.3427, "encoder_q-layer.8": 834.7423, "encoder_q-layer.9": 663.4241, "epoch": 0.94, "inbatch_neg_score": 0.6076, "inbatch_pos_score": 1.2988, "learning_rate": 2.277777777777778e-06, "loss": 3.0344, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.6926, "num_token_overlap": 15.8677, "num_token_query": 42.3271, "num_token_union": 68.379, "num_word_context": 202.0095, "num_word_doc": 49.7601, "num_word_query": 31.9609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1225.9962, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6079, "query_norm": 1.4702, "queue_k_norm": 1.5646, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3271, "sent_len_1": 66.6926, "sent_len_max_0": 128.0, "sent_len_max_1": 188.7075, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 56.0547, "active_queue_size": 16384.0, "cl_loss": 3.0416, "doc_norm": 1.5664, "encoder_q-embeddings": 520.1377, "encoder_q-layer.0": 348.3733, "encoder_q-layer.1": 364.4706, "encoder_q-layer.10": 617.4799, "encoder_q-layer.11": 1527.7944, "encoder_q-layer.2": 416.4649, "encoder_q-layer.3": 439.4614, "encoder_q-layer.4": 444.9095, "encoder_q-layer.5": 478.3162, "encoder_q-layer.6": 557.0377, "encoder_q-layer.7": 650.5152, "encoder_q-layer.8": 673.6641, "encoder_q-layer.9": 594.4409, "epoch": 0.94, "inbatch_neg_score": 0.6072, "inbatch_pos_score": 1.2832, "learning_rate": 2.2222222222222225e-06, "loss": 3.0416, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.6231, "num_token_overlap": 15.8695, "num_token_query": 42.4492, "num_token_union": 68.4855, "num_word_context": 202.2939, "num_word_doc": 49.7119, "num_word_query": 32.0733, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1019.0941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 1.4639, "queue_k_norm": 1.5644, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4492, "sent_len_1": 66.6231, "sent_len_max_0": 127.9938, "sent_len_max_1": 189.43, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.058, "doc_norm": 1.5609, "encoder_q-embeddings": 1071.5148, "encoder_q-layer.0": 769.427, "encoder_q-layer.1": 875.0229, "encoder_q-layer.10": 613.5256, "encoder_q-layer.11": 1526.5992, "encoder_q-layer.2": 981.7695, "encoder_q-layer.3": 1006.9734, "encoder_q-layer.4": 959.9028, "encoder_q-layer.5": 887.6206, "encoder_q-layer.6": 898.576, "encoder_q-layer.7": 897.0986, "encoder_q-layer.8": 900.0443, "encoder_q-layer.9": 660.7686, "epoch": 0.94, "inbatch_neg_score": 0.6097, "inbatch_pos_score": 1.3066, "learning_rate": 2.166666666666667e-06, "loss": 3.058, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.7547, "num_token_overlap": 15.8629, "num_token_query": 42.4306, "num_token_union": 68.5207, "num_word_context": 202.6943, "num_word_doc": 49.8611, "num_word_query": 32.0735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1459.4624, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4714, "queue_k_norm": 1.5629, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4306, "sent_len_1": 66.7547, "sent_len_max_0": 127.995, "sent_len_max_1": 191.4688, "stdk": 0.0487, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 54.3945, "active_queue_size": 16384.0, "cl_loss": 3.0408, "doc_norm": 1.5595, "encoder_q-embeddings": 974.6711, "encoder_q-layer.0": 673.9365, "encoder_q-layer.1": 717.5356, "encoder_q-layer.10": 607.4928, "encoder_q-layer.11": 1522.7935, "encoder_q-layer.2": 804.0683, "encoder_q-layer.3": 848.7732, "encoder_q-layer.4": 904.7797, "encoder_q-layer.5": 923.7558, "encoder_q-layer.6": 795.7336, "encoder_q-layer.7": 820.335, "encoder_q-layer.8": 705.1282, "encoder_q-layer.9": 591.3356, "epoch": 0.94, "inbatch_neg_score": 0.6104, "inbatch_pos_score": 1.2861, "learning_rate": 2.1111111111111114e-06, "loss": 3.0408, "norm_diff": 0.0982, "norm_loss": 0.0, "num_token_doc": 66.7457, "num_token_overlap": 15.8862, "num_token_query": 42.4591, "num_token_union": 68.5354, "num_word_context": 202.1405, "num_word_doc": 49.8083, "num_word_query": 32.074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1341.2973, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4613, "queue_k_norm": 1.5631, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4591, "sent_len_1": 66.7457, "sent_len_max_0": 127.9963, "sent_len_max_1": 188.1012, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 56.9336, "active_queue_size": 16384.0, "cl_loss": 3.0419, "doc_norm": 1.5653, "encoder_q-embeddings": 983.1079, "encoder_q-layer.0": 634.5114, "encoder_q-layer.1": 732.9412, "encoder_q-layer.10": 642.3027, "encoder_q-layer.11": 1504.6449, "encoder_q-layer.2": 924.7767, "encoder_q-layer.3": 990.6069, "encoder_q-layer.4": 1043.1748, "encoder_q-layer.5": 1120.8656, "encoder_q-layer.6": 1069.6638, "encoder_q-layer.7": 905.3816, "encoder_q-layer.8": 791.8577, "encoder_q-layer.9": 617.5054, "epoch": 0.94, "inbatch_neg_score": 0.6114, "inbatch_pos_score": 1.3105, "learning_rate": 2.055555555555556e-06, "loss": 3.0419, "norm_diff": 0.0924, "norm_loss": 0.0, "num_token_doc": 67.0125, "num_token_overlap": 15.8567, "num_token_query": 42.3039, "num_token_union": 68.6164, "num_word_context": 202.5757, "num_word_doc": 50.0175, "num_word_query": 31.9415, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1447.7436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.4729, "queue_k_norm": 1.5646, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3039, "sent_len_1": 67.0125, "sent_len_max_0": 127.995, "sent_len_max_1": 191.1687, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0591, "doc_norm": 1.5645, "encoder_q-embeddings": 494.0895, "encoder_q-layer.0": 327.367, "encoder_q-layer.1": 363.0035, "encoder_q-layer.10": 616.903, "encoder_q-layer.11": 1549.616, "encoder_q-layer.2": 430.9136, "encoder_q-layer.3": 449.3679, "encoder_q-layer.4": 489.8318, "encoder_q-layer.5": 510.3935, "encoder_q-layer.6": 555.5247, "encoder_q-layer.7": 612.3477, "encoder_q-layer.8": 693.5173, "encoder_q-layer.9": 605.4363, "epoch": 0.94, "inbatch_neg_score": 0.6077, "inbatch_pos_score": 1.2988, "learning_rate": 2.0000000000000003e-06, "loss": 3.0591, "norm_diff": 0.1098, "norm_loss": 0.0, "num_token_doc": 66.7268, "num_token_overlap": 15.8308, "num_token_query": 42.3341, "num_token_union": 68.4587, "num_word_context": 202.3093, "num_word_doc": 49.7882, "num_word_query": 31.9823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1020.8695, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 1.4547, "queue_k_norm": 1.5629, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3341, "sent_len_1": 66.7268, "sent_len_max_0": 127.995, "sent_len_max_1": 190.5188, "stdk": 0.0489, "stdq": 0.0445, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0277, "doc_norm": 1.5644, "encoder_q-embeddings": 1677.8286, "encoder_q-layer.0": 1127.8783, "encoder_q-layer.1": 1305.5594, "encoder_q-layer.10": 599.3882, "encoder_q-layer.11": 1554.6425, "encoder_q-layer.2": 1304.632, "encoder_q-layer.3": 1348.7216, "encoder_q-layer.4": 1368.3051, "encoder_q-layer.5": 1143.2007, "encoder_q-layer.6": 980.494, "encoder_q-layer.7": 925.3195, "encoder_q-layer.8": 837.006, "encoder_q-layer.9": 627.1541, "epoch": 0.94, "inbatch_neg_score": 0.6098, "inbatch_pos_score": 1.2812, "learning_rate": 1.9444444444444444e-06, "loss": 3.0277, "norm_diff": 0.1046, "norm_loss": 0.0, "num_token_doc": 66.9869, "num_token_overlap": 15.8497, "num_token_query": 42.1613, "num_token_union": 68.4803, "num_word_context": 202.3131, "num_word_doc": 49.9789, "num_word_query": 31.8333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1855.0366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6089, "query_norm": 1.4598, "queue_k_norm": 1.5632, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1613, "sent_len_1": 66.9869, "sent_len_max_0": 128.0, "sent_len_max_1": 190.7475, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 55.8594, "active_queue_size": 16384.0, "cl_loss": 3.0349, "doc_norm": 1.5638, "encoder_q-embeddings": 623.4481, "encoder_q-layer.0": 425.5641, "encoder_q-layer.1": 477.0323, "encoder_q-layer.10": 669.0168, "encoder_q-layer.11": 1489.2307, "encoder_q-layer.2": 547.3134, "encoder_q-layer.3": 542.6232, "encoder_q-layer.4": 589.8056, "encoder_q-layer.5": 598.1133, "encoder_q-layer.6": 603.5251, "encoder_q-layer.7": 629.2672, "encoder_q-layer.8": 701.6121, "encoder_q-layer.9": 639.6318, "epoch": 0.94, "inbatch_neg_score": 0.6108, "inbatch_pos_score": 1.293, "learning_rate": 1.888888888888889e-06, "loss": 3.0349, "norm_diff": 0.0973, "norm_loss": 0.0, "num_token_doc": 66.753, "num_token_overlap": 15.8267, "num_token_query": 42.4024, "num_token_union": 68.4813, "num_word_context": 202.3997, "num_word_doc": 49.8095, "num_word_query": 32.0156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1079.5977, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4665, "queue_k_norm": 1.565, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.4024, "sent_len_1": 66.753, "sent_len_max_0": 128.0, "sent_len_max_1": 190.98, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 57.4219, "active_queue_size": 16384.0, "cl_loss": 3.0471, "doc_norm": 1.5611, "encoder_q-embeddings": 1154.5908, "encoder_q-layer.0": 752.4694, "encoder_q-layer.1": 831.8817, "encoder_q-layer.10": 1225.6459, "encoder_q-layer.11": 3138.9031, "encoder_q-layer.2": 986.8776, "encoder_q-layer.3": 1050.2251, "encoder_q-layer.4": 1163.1046, "encoder_q-layer.5": 1325.411, "encoder_q-layer.6": 1314.7198, "encoder_q-layer.7": 1379.5098, "encoder_q-layer.8": 1405.5051, "encoder_q-layer.9": 1164.4697, "epoch": 0.94, "inbatch_neg_score": 0.6094, "inbatch_pos_score": 1.3018, "learning_rate": 1.8333333333333335e-06, "loss": 3.0471, "norm_diff": 0.1037, "norm_loss": 0.0, "num_token_doc": 66.8399, "num_token_overlap": 15.8622, "num_token_query": 42.365, "num_token_union": 68.4951, "num_word_context": 202.4025, "num_word_doc": 49.9022, "num_word_query": 32.0197, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2155.88, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4575, "queue_k_norm": 1.5632, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.365, "sent_len_1": 66.8399, "sent_len_max_0": 128.0, "sent_len_max_1": 191.2237, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 53.8086, "active_queue_size": 16384.0, "cl_loss": 3.0259, "doc_norm": 1.5631, "encoder_q-embeddings": 1960.5149, "encoder_q-layer.0": 1487.2418, "encoder_q-layer.1": 1588.7954, "encoder_q-layer.10": 1344.9377, "encoder_q-layer.11": 3078.9148, "encoder_q-layer.2": 1871.3116, "encoder_q-layer.3": 1849.4666, "encoder_q-layer.4": 1856.5469, "encoder_q-layer.5": 1711.269, "encoder_q-layer.6": 1703.7078, "encoder_q-layer.7": 1574.1755, "encoder_q-layer.8": 1469.8286, "encoder_q-layer.9": 1271.6262, "epoch": 0.95, "inbatch_neg_score": 0.6067, "inbatch_pos_score": 1.2871, "learning_rate": 1.777777777777778e-06, "loss": 3.0259, "norm_diff": 0.0931, "norm_loss": 0.0, "num_token_doc": 66.795, "num_token_overlap": 15.8322, "num_token_query": 42.375, "num_token_union": 68.5572, "num_word_context": 202.0967, "num_word_doc": 49.7982, "num_word_query": 31.9962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2761.7355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6064, "query_norm": 1.47, "queue_k_norm": 1.5645, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.375, "sent_len_1": 66.795, "sent_len_max_0": 127.9887, "sent_len_max_1": 190.365, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 54.5898, "active_queue_size": 16384.0, "cl_loss": 3.0411, "doc_norm": 1.5737, "encoder_q-embeddings": 1238.0752, "encoder_q-layer.0": 804.3006, "encoder_q-layer.1": 869.1346, "encoder_q-layer.10": 1226.7814, "encoder_q-layer.11": 3137.5945, "encoder_q-layer.2": 1003.4686, "encoder_q-layer.3": 1021.1537, "encoder_q-layer.4": 1021.6289, "encoder_q-layer.5": 1009.7236, "encoder_q-layer.6": 1098.3569, "encoder_q-layer.7": 1214.6531, "encoder_q-layer.8": 1392.4021, "encoder_q-layer.9": 1202.099, "epoch": 0.95, "inbatch_neg_score": 0.6071, "inbatch_pos_score": 1.2871, "learning_rate": 1.7222222222222222e-06, "loss": 3.0411, "norm_diff": 0.1105, "norm_loss": 0.0, "num_token_doc": 66.6287, "num_token_overlap": 15.8306, "num_token_query": 42.4915, "num_token_union": 68.4648, "num_word_context": 202.4702, "num_word_doc": 49.7417, "num_word_query": 32.1172, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2146.7628, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6084, "query_norm": 1.4632, "queue_k_norm": 1.5619, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4915, "sent_len_1": 66.6287, "sent_len_max_0": 127.99, "sent_len_max_1": 187.2475, "stdk": 0.0493, "stdq": 0.0448, "stdqueue_k": 0.0488, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0328, "doc_norm": 1.5698, "encoder_q-embeddings": 1077.6519, "encoder_q-layer.0": 725.3041, "encoder_q-layer.1": 787.5353, "encoder_q-layer.10": 1260.9283, "encoder_q-layer.11": 3121.6306, "encoder_q-layer.2": 904.7853, "encoder_q-layer.3": 929.6924, "encoder_q-layer.4": 984.6797, "encoder_q-layer.5": 1068.4445, "encoder_q-layer.6": 1185.528, "encoder_q-layer.7": 1408.4634, "encoder_q-layer.8": 1455.2289, "encoder_q-layer.9": 1236.3439, "epoch": 0.95, "inbatch_neg_score": 0.6099, "inbatch_pos_score": 1.2969, "learning_rate": 1.6666666666666667e-06, "loss": 3.0328, "norm_diff": 0.106, "norm_loss": 0.0, "num_token_doc": 66.6912, "num_token_overlap": 15.8247, "num_token_query": 42.2545, "num_token_union": 68.3422, "num_word_context": 202.0206, "num_word_doc": 49.7373, "num_word_query": 31.9294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2112.8159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.4638, "queue_k_norm": 1.5643, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2545, "sent_len_1": 66.6912, "sent_len_max_0": 128.0, "sent_len_max_1": 188.6838, "stdk": 0.049, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0381, "doc_norm": 1.5678, "encoder_q-embeddings": 1495.7035, "encoder_q-layer.0": 1060.597, "encoder_q-layer.1": 1028.8873, "encoder_q-layer.10": 1245.0955, "encoder_q-layer.11": 3035.5972, "encoder_q-layer.2": 1105.0544, "encoder_q-layer.3": 1232.464, "encoder_q-layer.4": 1345.8304, "encoder_q-layer.5": 1315.2446, "encoder_q-layer.6": 1310.197, "encoder_q-layer.7": 1349.373, "encoder_q-layer.8": 1472.8691, "encoder_q-layer.9": 1237.358, "epoch": 0.95, "inbatch_neg_score": 0.6111, "inbatch_pos_score": 1.2969, "learning_rate": 1.6111111111111111e-06, "loss": 3.0381, "norm_diff": 0.1082, "norm_loss": 0.0, "num_token_doc": 66.7925, "num_token_overlap": 15.7575, "num_token_query": 42.182, "num_token_union": 68.4444, "num_word_context": 202.2752, "num_word_doc": 49.8256, "num_word_query": 31.8482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2339.9789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.4596, "queue_k_norm": 1.5649, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.182, "sent_len_1": 66.7925, "sent_len_max_0": 127.99, "sent_len_max_1": 189.6813, "stdk": 0.0489, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0347, "doc_norm": 1.5654, "encoder_q-embeddings": 1345.2109, "encoder_q-layer.0": 964.5959, "encoder_q-layer.1": 1152.8311, "encoder_q-layer.10": 1210.1672, "encoder_q-layer.11": 2866.249, "encoder_q-layer.2": 1387.5494, "encoder_q-layer.3": 1436.66, "encoder_q-layer.4": 1457.2155, "encoder_q-layer.5": 1330.7028, "encoder_q-layer.6": 1296.9391, "encoder_q-layer.7": 1264.5914, "encoder_q-layer.8": 1331.9108, "encoder_q-layer.9": 1158.7782, "epoch": 0.95, "inbatch_neg_score": 0.6094, "inbatch_pos_score": 1.3145, "learning_rate": 1.5555555555555556e-06, "loss": 3.0347, "norm_diff": 0.0901, "norm_loss": 0.0, "num_token_doc": 66.8777, "num_token_overlap": 15.8023, "num_token_query": 42.1924, "num_token_union": 68.4674, "num_word_context": 202.5007, "num_word_doc": 49.8867, "num_word_query": 31.8478, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2237.9629, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4753, "queue_k_norm": 1.5646, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.1924, "sent_len_1": 66.8777, "sent_len_max_0": 127.9963, "sent_len_max_1": 189.2775, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 54.1992, "active_queue_size": 16384.0, "cl_loss": 3.0357, "doc_norm": 1.5577, "encoder_q-embeddings": 937.9818, "encoder_q-layer.0": 633.086, "encoder_q-layer.1": 672.8236, "encoder_q-layer.10": 1232.3208, "encoder_q-layer.11": 3029.5605, "encoder_q-layer.2": 744.3038, "encoder_q-layer.3": 771.3895, "encoder_q-layer.4": 821.2166, "encoder_q-layer.5": 877.1575, "encoder_q-layer.6": 977.3529, "encoder_q-layer.7": 1101.136, "encoder_q-layer.8": 1315.5375, "encoder_q-layer.9": 1165.1459, "epoch": 0.95, "inbatch_neg_score": 0.6098, "inbatch_pos_score": 1.2822, "learning_rate": 1.5e-06, "loss": 3.0357, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.9914, "num_token_overlap": 15.8772, "num_token_query": 42.4235, "num_token_union": 68.6684, "num_word_context": 202.5137, "num_word_doc": 50.0271, "num_word_query": 32.0395, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1950.9332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4581, "queue_k_norm": 1.5651, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.4235, "sent_len_1": 66.9914, "sent_len_max_0": 128.0, "sent_len_max_1": 187.6287, "stdk": 0.0485, "stdq": 0.0446, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.0394, "doc_norm": 1.5635, "encoder_q-embeddings": 1120.9862, "encoder_q-layer.0": 724.5209, "encoder_q-layer.1": 798.9243, "encoder_q-layer.10": 1268.0322, "encoder_q-layer.11": 3090.7275, "encoder_q-layer.2": 967.3028, "encoder_q-layer.3": 1037.5769, "encoder_q-layer.4": 1113.3601, "encoder_q-layer.5": 1232.965, "encoder_q-layer.6": 1441.1469, "encoder_q-layer.7": 1476.1602, "encoder_q-layer.8": 1628.4568, "encoder_q-layer.9": 1311.041, "epoch": 0.95, "inbatch_neg_score": 0.6106, "inbatch_pos_score": 1.3115, "learning_rate": 1.4444444444444445e-06, "loss": 3.0394, "norm_diff": 0.0929, "norm_loss": 0.0, "num_token_doc": 66.6958, "num_token_overlap": 15.8099, "num_token_query": 42.1532, "num_token_union": 68.3774, "num_word_context": 202.3066, "num_word_doc": 49.7407, "num_word_query": 31.8184, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2213.3041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4707, "queue_k_norm": 1.5647, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1532, "sent_len_1": 66.6958, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0362, "stdk": 0.0488, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 57.7148, "active_queue_size": 16384.0, "cl_loss": 3.0442, "doc_norm": 1.5678, "encoder_q-embeddings": 1170.9476, "encoder_q-layer.0": 776.9126, "encoder_q-layer.1": 841.6225, "encoder_q-layer.10": 1234.8572, "encoder_q-layer.11": 2941.1113, "encoder_q-layer.2": 928.5258, "encoder_q-layer.3": 944.3988, "encoder_q-layer.4": 983.4075, "encoder_q-layer.5": 995.5258, "encoder_q-layer.6": 1097.9524, "encoder_q-layer.7": 1174.7703, "encoder_q-layer.8": 1433.8931, "encoder_q-layer.9": 1248.3914, "epoch": 0.95, "inbatch_neg_score": 0.6105, "inbatch_pos_score": 1.293, "learning_rate": 1.388888888888889e-06, "loss": 3.0442, "norm_diff": 0.0983, "norm_loss": 0.0, "num_token_doc": 66.7768, "num_token_overlap": 15.749, "num_token_query": 42.1874, "num_token_union": 68.4706, "num_word_context": 202.4579, "num_word_doc": 49.8196, "num_word_query": 31.8377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2062.0531, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6104, "query_norm": 1.4695, "queue_k_norm": 1.5639, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1874, "sent_len_1": 66.7768, "sent_len_max_0": 127.9788, "sent_len_max_1": 189.92, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 54.7852, "active_queue_size": 16384.0, "cl_loss": 3.0204, "doc_norm": 1.5625, "encoder_q-embeddings": 1801.8115, "encoder_q-layer.0": 1422.0978, "encoder_q-layer.1": 1694.0837, "encoder_q-layer.10": 1279.8928, "encoder_q-layer.11": 3006.4692, "encoder_q-layer.2": 2013.7299, "encoder_q-layer.3": 1726.0485, "encoder_q-layer.4": 1581.5232, "encoder_q-layer.5": 1433.2705, "encoder_q-layer.6": 1467.3264, "encoder_q-layer.7": 1441.239, "encoder_q-layer.8": 1377.5193, "encoder_q-layer.9": 1179.6587, "epoch": 0.95, "inbatch_neg_score": 0.6111, "inbatch_pos_score": 1.2773, "learning_rate": 1.3333333333333334e-06, "loss": 3.0204, "norm_diff": 0.1036, "norm_loss": 0.0, "num_token_doc": 66.6037, "num_token_overlap": 15.8513, "num_token_query": 42.5504, "num_token_union": 68.5166, "num_word_context": 202.1225, "num_word_doc": 49.6557, "num_word_query": 32.1773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2671.3463, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6104, "query_norm": 1.4589, "queue_k_norm": 1.5634, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.5504, "sent_len_1": 66.6037, "sent_len_max_0": 128.0, "sent_len_max_1": 189.3363, "stdk": 0.0487, "stdq": 0.0446, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 54.8828, "active_queue_size": 16384.0, "cl_loss": 3.0375, "doc_norm": 1.5615, "encoder_q-embeddings": 1231.2681, "encoder_q-layer.0": 849.8165, "encoder_q-layer.1": 915.9559, "encoder_q-layer.10": 1258.1205, "encoder_q-layer.11": 3094.052, "encoder_q-layer.2": 998.3568, "encoder_q-layer.3": 1051.3186, "encoder_q-layer.4": 1093.0035, "encoder_q-layer.5": 1065.4873, "encoder_q-layer.6": 1135.6074, "encoder_q-layer.7": 1298.3112, "encoder_q-layer.8": 1425.854, "encoder_q-layer.9": 1220.4138, "epoch": 0.95, "inbatch_neg_score": 0.6094, "inbatch_pos_score": 1.291, "learning_rate": 1.2777777777777779e-06, "loss": 3.0375, "norm_diff": 0.0967, "norm_loss": 0.0, "num_token_doc": 67.0036, "num_token_overlap": 15.8173, "num_token_query": 42.2362, "num_token_union": 68.5673, "num_word_context": 202.5487, "num_word_doc": 49.9475, "num_word_query": 31.89, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2162.7261, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4648, "queue_k_norm": 1.5648, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2362, "sent_len_1": 67.0036, "sent_len_max_0": 128.0, "sent_len_max_1": 189.9875, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 56.25, "active_queue_size": 16384.0, "cl_loss": 3.0253, "doc_norm": 1.5647, "encoder_q-embeddings": 957.0903, "encoder_q-layer.0": 641.2543, "encoder_q-layer.1": 654.4086, "encoder_q-layer.10": 1306.2737, "encoder_q-layer.11": 2926.2031, "encoder_q-layer.2": 720.8728, "encoder_q-layer.3": 760.8348, "encoder_q-layer.4": 782.4868, "encoder_q-layer.5": 822.9744, "encoder_q-layer.6": 952.4088, "encoder_q-layer.7": 1100.1071, "encoder_q-layer.8": 1259.3093, "encoder_q-layer.9": 1155.36, "epoch": 0.95, "inbatch_neg_score": 0.6079, "inbatch_pos_score": 1.2988, "learning_rate": 1.2222222222222223e-06, "loss": 3.0253, "norm_diff": 0.1005, "norm_loss": 0.0, "num_token_doc": 66.7332, "num_token_overlap": 15.8901, "num_token_query": 42.5518, "num_token_union": 68.5382, "num_word_context": 202.576, "num_word_doc": 49.8046, "num_word_query": 32.1795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1903.7334, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6084, "query_norm": 1.4641, "queue_k_norm": 1.5637, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.5518, "sent_len_1": 66.7332, "sent_len_max_0": 128.0, "sent_len_max_1": 189.4487, "stdk": 0.0489, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 55.2734, "active_queue_size": 16384.0, "cl_loss": 3.0245, "doc_norm": 1.5642, "encoder_q-embeddings": 1571.3943, "encoder_q-layer.0": 1106.2737, "encoder_q-layer.1": 1276.7433, "encoder_q-layer.10": 1144.8342, "encoder_q-layer.11": 2894.1023, "encoder_q-layer.2": 1462.8348, "encoder_q-layer.3": 1511.0132, "encoder_q-layer.4": 1567.0072, "encoder_q-layer.5": 1847.053, "encoder_q-layer.6": 1834.2544, "encoder_q-layer.7": 1602.0317, "encoder_q-layer.8": 1389.3427, "encoder_q-layer.9": 1154.5493, "epoch": 0.96, "inbatch_neg_score": 0.6117, "inbatch_pos_score": 1.2891, "learning_rate": 1.1666666666666668e-06, "loss": 3.0245, "norm_diff": 0.1006, "norm_loss": 0.0, "num_token_doc": 66.7094, "num_token_overlap": 15.8356, "num_token_query": 42.6246, "num_token_union": 68.6399, "num_word_context": 202.5333, "num_word_doc": 49.8065, "num_word_query": 32.2389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2469.0479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6108, "query_norm": 1.4636, "queue_k_norm": 1.564, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.6246, "sent_len_1": 66.7094, "sent_len_max_0": 127.9912, "sent_len_max_1": 190.4387, "stdk": 0.0488, "stdq": 0.0449, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.0322, "doc_norm": 1.5629, "encoder_q-embeddings": 1600.1097, "encoder_q-layer.0": 1160.193, "encoder_q-layer.1": 1233.5892, "encoder_q-layer.10": 1173.0288, "encoder_q-layer.11": 2964.804, "encoder_q-layer.2": 1508.7384, "encoder_q-layer.3": 1596.4786, "encoder_q-layer.4": 1705.4773, "encoder_q-layer.5": 1703.5553, "encoder_q-layer.6": 1782.3656, "encoder_q-layer.7": 1580.4274, "encoder_q-layer.8": 1410.4075, "encoder_q-layer.9": 1144.0546, "epoch": 0.96, "inbatch_neg_score": 0.6085, "inbatch_pos_score": 1.2998, "learning_rate": 1.1111111111111112e-06, "loss": 3.0322, "norm_diff": 0.0915, "norm_loss": 0.0, "num_token_doc": 66.8925, "num_token_overlap": 15.8814, "num_token_query": 42.4155, "num_token_union": 68.5717, "num_word_context": 202.433, "num_word_doc": 49.9163, "num_word_query": 32.0689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2531.12, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.4714, "queue_k_norm": 1.5634, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.4155, "sent_len_1": 66.8925, "sent_len_max_0": 127.9988, "sent_len_max_1": 188.6675, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.035, "doc_norm": 1.5666, "encoder_q-embeddings": 5951.6274, "encoder_q-layer.0": 4042.5139, "encoder_q-layer.1": 4329.2217, "encoder_q-layer.10": 1375.141, "encoder_q-layer.11": 3001.6104, "encoder_q-layer.2": 5415.5132, "encoder_q-layer.3": 6147.5347, "encoder_q-layer.4": 7002.5996, "encoder_q-layer.5": 7319.856, "encoder_q-layer.6": 6897.8647, "encoder_q-layer.7": 7127.3174, "encoder_q-layer.8": 6200.7578, "encoder_q-layer.9": 2953.9829, "epoch": 0.96, "inbatch_neg_score": 0.6119, "inbatch_pos_score": 1.2988, "learning_rate": 1.0555555555555557e-06, "loss": 3.035, "norm_diff": 0.1038, "norm_loss": 0.0, "num_token_doc": 66.6411, "num_token_overlap": 15.7633, "num_token_query": 42.1895, "num_token_union": 68.3469, "num_word_context": 201.9823, "num_word_doc": 49.6763, "num_word_query": 31.858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8385.9172, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6113, "query_norm": 1.4628, "queue_k_norm": 1.5637, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.1895, "sent_len_1": 66.6411, "sent_len_max_0": 128.0, "sent_len_max_1": 190.8562, "stdk": 0.0489, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 56.7383, "active_queue_size": 16384.0, "cl_loss": 3.0337, "doc_norm": 1.5622, "encoder_q-embeddings": 1083.1394, "encoder_q-layer.0": 717.5145, "encoder_q-layer.1": 775.1152, "encoder_q-layer.10": 1328.8572, "encoder_q-layer.11": 3112.1626, "encoder_q-layer.2": 935.5969, "encoder_q-layer.3": 1001.9855, "encoder_q-layer.4": 1082.4055, "encoder_q-layer.5": 1127.6548, "encoder_q-layer.6": 1250.3706, "encoder_q-layer.7": 1367.2753, "encoder_q-layer.8": 1549.5488, "encoder_q-layer.9": 1348.5825, "epoch": 0.96, "inbatch_neg_score": 0.6114, "inbatch_pos_score": 1.2979, "learning_rate": 1.0000000000000002e-06, "loss": 3.0337, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.7568, "num_token_overlap": 15.8354, "num_token_query": 42.3479, "num_token_union": 68.4855, "num_word_context": 202.417, "num_word_doc": 49.8184, "num_word_query": 31.9616, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2187.9675, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4669, "queue_k_norm": 1.5662, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3479, "sent_len_1": 66.7568, "sent_len_max_0": 127.9925, "sent_len_max_1": 189.09, "stdk": 0.0487, "stdq": 0.0449, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 56.4453, "active_queue_size": 16384.0, "cl_loss": 3.0311, "doc_norm": 1.568, "encoder_q-embeddings": 1889.324, "encoder_q-layer.0": 1321.1061, "encoder_q-layer.1": 1487.0646, "encoder_q-layer.10": 1295.593, "encoder_q-layer.11": 3021.312, "encoder_q-layer.2": 1646.3027, "encoder_q-layer.3": 1771.7341, "encoder_q-layer.4": 2072.4114, "encoder_q-layer.5": 2152.0969, "encoder_q-layer.6": 2631.2449, "encoder_q-layer.7": 2616.3364, "encoder_q-layer.8": 1772.0323, "encoder_q-layer.9": 1242.3572, "epoch": 0.96, "inbatch_neg_score": 0.6114, "inbatch_pos_score": 1.3057, "learning_rate": 9.444444444444445e-07, "loss": 3.0311, "norm_diff": 0.0982, "norm_loss": 0.0, "num_token_doc": 66.7559, "num_token_overlap": 15.8404, "num_token_query": 42.4304, "num_token_union": 68.5213, "num_word_context": 202.2555, "num_word_doc": 49.8389, "num_word_query": 32.0196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3010.1622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6108, "query_norm": 1.4699, "queue_k_norm": 1.5641, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4304, "sent_len_1": 66.7559, "sent_len_max_0": 128.0, "sent_len_max_1": 187.0488, "stdk": 0.049, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 56.6406, "active_queue_size": 16384.0, "cl_loss": 3.0324, "doc_norm": 1.5652, "encoder_q-embeddings": 1004.7598, "encoder_q-layer.0": 665.4541, "encoder_q-layer.1": 719.1722, "encoder_q-layer.10": 1231.2607, "encoder_q-layer.11": 2968.7339, "encoder_q-layer.2": 815.0314, "encoder_q-layer.3": 884.2659, "encoder_q-layer.4": 952.4039, "encoder_q-layer.5": 1024.4191, "encoder_q-layer.6": 1133.2878, "encoder_q-layer.7": 1264.1049, "encoder_q-layer.8": 1438.6047, "encoder_q-layer.9": 1269.1271, "epoch": 0.96, "inbatch_neg_score": 0.612, "inbatch_pos_score": 1.3096, "learning_rate": 8.88888888888889e-07, "loss": 3.0324, "norm_diff": 0.1016, "norm_loss": 0.0, "num_token_doc": 66.9339, "num_token_overlap": 15.8057, "num_token_query": 42.2365, "num_token_union": 68.5327, "num_word_context": 202.3546, "num_word_doc": 49.9075, "num_word_query": 31.8541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2022.8056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6118, "query_norm": 1.4635, "queue_k_norm": 1.5654, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2365, "sent_len_1": 66.9339, "sent_len_max_0": 127.9925, "sent_len_max_1": 191.9525, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.0279, "doc_norm": 1.5661, "encoder_q-embeddings": 997.4372, "encoder_q-layer.0": 664.3766, "encoder_q-layer.1": 697.3745, "encoder_q-layer.10": 1351.4819, "encoder_q-layer.11": 3035.9028, "encoder_q-layer.2": 809.746, "encoder_q-layer.3": 864.4523, "encoder_q-layer.4": 970.2755, "encoder_q-layer.5": 1056.0444, "encoder_q-layer.6": 1244.4489, "encoder_q-layer.7": 1422.0919, "encoder_q-layer.8": 1316.553, "encoder_q-layer.9": 1191.3752, "epoch": 0.96, "inbatch_neg_score": 0.6105, "inbatch_pos_score": 1.3105, "learning_rate": 8.333333333333333e-07, "loss": 3.0279, "norm_diff": 0.0954, "norm_loss": 0.0, "num_token_doc": 66.9053, "num_token_overlap": 15.8504, "num_token_query": 42.2943, "num_token_union": 68.5171, "num_word_context": 202.4429, "num_word_doc": 49.9031, "num_word_query": 31.9442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2058.7802, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4706, "queue_k_norm": 1.5649, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2943, "sent_len_1": 66.9053, "sent_len_max_0": 127.995, "sent_len_max_1": 189.8475, "stdk": 0.0489, "stdq": 0.0451, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 54.6875, "active_queue_size": 16384.0, "cl_loss": 3.0496, "doc_norm": 1.5638, "encoder_q-embeddings": 1348.7755, "encoder_q-layer.0": 881.4324, "encoder_q-layer.1": 1010.3062, "encoder_q-layer.10": 1258.8934, "encoder_q-layer.11": 3148.9155, "encoder_q-layer.2": 1125.3806, "encoder_q-layer.3": 1182.9196, "encoder_q-layer.4": 1262.5416, "encoder_q-layer.5": 1366.2634, "encoder_q-layer.6": 1494.0764, "encoder_q-layer.7": 1411.9353, "encoder_q-layer.8": 1444.813, "encoder_q-layer.9": 1256.5872, "epoch": 0.96, "inbatch_neg_score": 0.6113, "inbatch_pos_score": 1.2871, "learning_rate": 7.777777777777778e-07, "loss": 3.0496, "norm_diff": 0.1029, "norm_loss": 0.0, "num_token_doc": 66.8051, "num_token_overlap": 15.8005, "num_token_query": 42.1809, "num_token_union": 68.486, "num_word_context": 202.3171, "num_word_doc": 49.8587, "num_word_query": 31.8564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2283.0252, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6118, "query_norm": 1.4609, "queue_k_norm": 1.5658, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1809, "sent_len_1": 66.8051, "sent_len_max_0": 127.9975, "sent_len_max_1": 188.7512, "stdk": 0.0488, "stdq": 0.0447, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 56.1523, "active_queue_size": 16384.0, "cl_loss": 3.0264, "doc_norm": 1.5625, "encoder_q-embeddings": 1936.7236, "encoder_q-layer.0": 1257.4832, "encoder_q-layer.1": 1362.9357, "encoder_q-layer.10": 2433.4602, "encoder_q-layer.11": 5858.876, "encoder_q-layer.2": 1555.6351, "encoder_q-layer.3": 1661.4563, "encoder_q-layer.4": 1755.9086, "encoder_q-layer.5": 1774.538, "encoder_q-layer.6": 2053.866, "encoder_q-layer.7": 2267.3994, "encoder_q-layer.8": 2679.4114, "encoder_q-layer.9": 2360.5144, "epoch": 0.96, "inbatch_neg_score": 0.6122, "inbatch_pos_score": 1.29, "learning_rate": 7.222222222222222e-07, "loss": 3.0264, "norm_diff": 0.0932, "norm_loss": 0.0, "num_token_doc": 66.7298, "num_token_overlap": 15.8771, "num_token_query": 42.4918, "num_token_union": 68.5135, "num_word_context": 202.3264, "num_word_doc": 49.7968, "num_word_query": 32.1117, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3854.9615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6118, "query_norm": 1.4693, "queue_k_norm": 1.5636, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.4918, "sent_len_1": 66.7298, "sent_len_max_0": 128.0, "sent_len_max_1": 190.0412, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0277, "doc_norm": 1.5645, "encoder_q-embeddings": 2694.4875, "encoder_q-layer.0": 1848.3533, "encoder_q-layer.1": 2000.131, "encoder_q-layer.10": 2698.677, "encoder_q-layer.11": 6197.8687, "encoder_q-layer.2": 2367.0159, "encoder_q-layer.3": 2539.1348, "encoder_q-layer.4": 2900.1929, "encoder_q-layer.5": 2978.5654, "encoder_q-layer.6": 3063.1062, "encoder_q-layer.7": 3379.4485, "encoder_q-layer.8": 3603.8364, "encoder_q-layer.9": 2795.6697, "epoch": 0.96, "inbatch_neg_score": 0.6124, "inbatch_pos_score": 1.2773, "learning_rate": 6.666666666666667e-07, "loss": 3.0277, "norm_diff": 0.0961, "norm_loss": 0.0, "num_token_doc": 66.9328, "num_token_overlap": 15.8426, "num_token_query": 42.496, "num_token_union": 68.6531, "num_word_context": 202.5469, "num_word_doc": 49.9324, "num_word_query": 32.125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4854.4316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6118, "query_norm": 1.4684, "queue_k_norm": 1.5663, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.496, "sent_len_1": 66.9328, "sent_len_max_0": 128.0, "sent_len_max_1": 191.0925, "stdk": 0.0488, "stdq": 0.045, "stdqueue_k": 0.049, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 57.5195, "active_queue_size": 16384.0, "cl_loss": 3.0347, "doc_norm": 1.5702, "encoder_q-embeddings": 7080.4307, "encoder_q-layer.0": 4613.3145, "encoder_q-layer.1": 5188.6143, "encoder_q-layer.10": 2649.834, "encoder_q-layer.11": 6085.3945, "encoder_q-layer.2": 6801.0, "encoder_q-layer.3": 6925.9414, "encoder_q-layer.4": 7301.5239, "encoder_q-layer.5": 7022.376, "encoder_q-layer.6": 7194.7822, "encoder_q-layer.7": 5872.7212, "encoder_q-layer.8": 4343.1519, "encoder_q-layer.9": 2998.9429, "epoch": 0.97, "inbatch_neg_score": 0.6099, "inbatch_pos_score": 1.3164, "learning_rate": 6.111111111111112e-07, "loss": 3.0347, "norm_diff": 0.1057, "norm_loss": 0.0, "num_token_doc": 66.9016, "num_token_overlap": 15.8211, "num_token_query": 42.2662, "num_token_union": 68.5711, "num_word_context": 202.5171, "num_word_doc": 49.9846, "num_word_query": 31.9394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8888.4424, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6113, "query_norm": 1.4645, "queue_k_norm": 1.5642, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.2662, "sent_len_1": 66.9016, "sent_len_max_0": 127.9988, "sent_len_max_1": 187.4062, "stdk": 0.0491, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 55.5664, "active_queue_size": 16384.0, "cl_loss": 3.0232, "doc_norm": 1.5667, "encoder_q-embeddings": 2325.2466, "encoder_q-layer.0": 1621.9224, "encoder_q-layer.1": 1831.0543, "encoder_q-layer.10": 2769.043, "encoder_q-layer.11": 6521.0254, "encoder_q-layer.2": 2151.1655, "encoder_q-layer.3": 2186.7087, "encoder_q-layer.4": 2289.7151, "encoder_q-layer.5": 2701.0505, "encoder_q-layer.6": 2931.1367, "encoder_q-layer.7": 2779.5085, "encoder_q-layer.8": 2973.6216, "encoder_q-layer.9": 2579.5713, "epoch": 0.97, "inbatch_neg_score": 0.6112, "inbatch_pos_score": 1.3018, "learning_rate": 5.555555555555556e-07, "loss": 3.0232, "norm_diff": 0.0931, "norm_loss": 0.0, "num_token_doc": 66.8099, "num_token_overlap": 15.9161, "num_token_query": 42.3489, "num_token_union": 68.4101, "num_word_context": 202.22, "num_word_doc": 49.8265, "num_word_query": 31.9829, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4533.327, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4736, "queue_k_norm": 1.565, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.3489, "sent_len_1": 66.8099, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.2713, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 59.1797, "active_queue_size": 16384.0, "cl_loss": 3.0278, "doc_norm": 1.5667, "encoder_q-embeddings": 2571.0032, "encoder_q-layer.0": 1719.2583, "encoder_q-layer.1": 2017.1948, "encoder_q-layer.10": 2516.9407, "encoder_q-layer.11": 6017.0747, "encoder_q-layer.2": 2411.373, "encoder_q-layer.3": 2762.1206, "encoder_q-layer.4": 2986.2217, "encoder_q-layer.5": 2986.3767, "encoder_q-layer.6": 2888.978, "encoder_q-layer.7": 2988.1357, "encoder_q-layer.8": 2845.9111, "encoder_q-layer.9": 2358.2258, "epoch": 0.97, "inbatch_neg_score": 0.6116, "inbatch_pos_score": 1.3252, "learning_rate": 5.000000000000001e-07, "loss": 3.0278, "norm_diff": 0.0926, "norm_loss": 0.0, "num_token_doc": 66.8748, "num_token_overlap": 15.8701, "num_token_query": 42.3341, "num_token_union": 68.4951, "num_word_context": 202.141, "num_word_doc": 49.9069, "num_word_query": 31.9855, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4613.5425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4741, "queue_k_norm": 1.5646, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3341, "sent_len_1": 66.8748, "sent_len_max_0": 128.0, "sent_len_max_1": 188.475, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 57.9102, "active_queue_size": 16384.0, "cl_loss": 3.0437, "doc_norm": 1.5647, "encoder_q-embeddings": 1824.8807, "encoder_q-layer.0": 1222.528, "encoder_q-layer.1": 1285.528, "encoder_q-layer.10": 2493.738, "encoder_q-layer.11": 5939.333, "encoder_q-layer.2": 1427.3549, "encoder_q-layer.3": 1468.8459, "encoder_q-layer.4": 1561.2737, "encoder_q-layer.5": 1571.8428, "encoder_q-layer.6": 1774.1718, "encoder_q-layer.7": 2028.8829, "encoder_q-layer.8": 2404.542, "encoder_q-layer.9": 2327.2412, "epoch": 0.97, "inbatch_neg_score": 0.613, "inbatch_pos_score": 1.2979, "learning_rate": 4.444444444444445e-07, "loss": 3.0437, "norm_diff": 0.092, "norm_loss": 0.0, "num_token_doc": 66.9552, "num_token_overlap": 15.9276, "num_token_query": 42.6034, "num_token_union": 68.6112, "num_word_context": 202.5211, "num_word_doc": 49.9306, "num_word_query": 32.1856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3727.7806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6133, "query_norm": 1.4727, "queue_k_norm": 1.565, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.6034, "sent_len_1": 66.9552, "sent_len_max_0": 128.0, "sent_len_max_1": 190.69, "stdk": 0.0488, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 61.0352, "active_queue_size": 16384.0, "cl_loss": 3.0373, "doc_norm": 1.5691, "encoder_q-embeddings": 2030.8009, "encoder_q-layer.0": 1349.1556, "encoder_q-layer.1": 1508.2968, "encoder_q-layer.10": 2411.6714, "encoder_q-layer.11": 5935.8867, "encoder_q-layer.2": 1701.7336, "encoder_q-layer.3": 1801.0293, "encoder_q-layer.4": 1919.9873, "encoder_q-layer.5": 1993.166, "encoder_q-layer.6": 2088.2563, "encoder_q-layer.7": 2386.6521, "encoder_q-layer.8": 2564.21, "encoder_q-layer.9": 2268.7693, "epoch": 0.97, "inbatch_neg_score": 0.6131, "inbatch_pos_score": 1.335, "learning_rate": 3.888888888888889e-07, "loss": 3.0373, "norm_diff": 0.0957, "norm_loss": 0.0, "num_token_doc": 66.8752, "num_token_overlap": 15.8606, "num_token_query": 42.3125, "num_token_union": 68.5216, "num_word_context": 202.4955, "num_word_doc": 49.9402, "num_word_query": 31.9746, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3929.4866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6133, "query_norm": 1.4734, "queue_k_norm": 1.5646, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3125, "sent_len_1": 66.8752, "sent_len_max_0": 128.0, "sent_len_max_1": 189.7725, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 59.4727, "active_queue_size": 16384.0, "cl_loss": 3.0183, "doc_norm": 1.5656, "encoder_q-embeddings": 4307.332, "encoder_q-layer.0": 2917.9351, "encoder_q-layer.1": 3323.4702, "encoder_q-layer.10": 2810.468, "encoder_q-layer.11": 5855.106, "encoder_q-layer.2": 3758.7927, "encoder_q-layer.3": 4011.4583, "encoder_q-layer.4": 3997.4558, "encoder_q-layer.5": 4037.9624, "encoder_q-layer.6": 3990.7825, "encoder_q-layer.7": 3517.6282, "encoder_q-layer.8": 3657.7305, "encoder_q-layer.9": 2799.623, "epoch": 0.97, "inbatch_neg_score": 0.6112, "inbatch_pos_score": 1.3213, "learning_rate": 3.3333333333333335e-07, "loss": 3.0183, "norm_diff": 0.0894, "norm_loss": 0.0, "num_token_doc": 66.883, "num_token_overlap": 15.8158, "num_token_query": 42.2757, "num_token_union": 68.5297, "num_word_context": 202.1661, "num_word_doc": 49.9077, "num_word_query": 31.9031, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5848.8807, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6118, "query_norm": 1.4762, "queue_k_norm": 1.5639, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.2757, "sent_len_1": 66.883, "sent_len_max_0": 127.9875, "sent_len_max_1": 188.4775, "stdk": 0.0489, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 58.0078, "active_queue_size": 16384.0, "cl_loss": 3.0176, "doc_norm": 1.5684, "encoder_q-embeddings": 1987.7559, "encoder_q-layer.0": 1314.5354, "encoder_q-layer.1": 1441.8903, "encoder_q-layer.10": 2966.9573, "encoder_q-layer.11": 6726.8691, "encoder_q-layer.2": 1641.1063, "encoder_q-layer.3": 1711.7196, "encoder_q-layer.4": 1836.9204, "encoder_q-layer.5": 1927.0231, "encoder_q-layer.6": 2147.0066, "encoder_q-layer.7": 2349.3513, "encoder_q-layer.8": 2974.3821, "encoder_q-layer.9": 2583.5215, "epoch": 0.97, "inbatch_neg_score": 0.6111, "inbatch_pos_score": 1.3223, "learning_rate": 2.777777777777778e-07, "loss": 3.0176, "norm_diff": 0.0967, "norm_loss": 0.0, "num_token_doc": 66.584, "num_token_overlap": 15.8383, "num_token_query": 42.3927, "num_token_union": 68.4137, "num_word_context": 202.0036, "num_word_doc": 49.6848, "num_word_query": 32.0096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4234.0746, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6128, "query_norm": 1.4718, "queue_k_norm": 1.5639, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.3927, "sent_len_1": 66.584, "sent_len_max_0": 127.9912, "sent_len_max_1": 188.1188, "stdk": 0.049, "stdq": 0.0452, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 55.3711, "active_queue_size": 16384.0, "cl_loss": 3.0343, "doc_norm": 1.563, "encoder_q-embeddings": 3438.4644, "encoder_q-layer.0": 2436.6985, "encoder_q-layer.1": 2616.8018, "encoder_q-layer.10": 2545.5693, "encoder_q-layer.11": 6322.749, "encoder_q-layer.2": 3148.1975, "encoder_q-layer.3": 3392.3538, "encoder_q-layer.4": 3483.1638, "encoder_q-layer.5": 3550.0168, "encoder_q-layer.6": 3863.7725, "encoder_q-layer.7": 4020.9778, "encoder_q-layer.8": 3826.9199, "encoder_q-layer.9": 2917.2136, "epoch": 0.97, "inbatch_neg_score": 0.6148, "inbatch_pos_score": 1.2939, "learning_rate": 2.2222222222222224e-07, "loss": 3.0343, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 66.7199, "num_token_overlap": 15.8335, "num_token_query": 42.3806, "num_token_union": 68.5039, "num_word_context": 202.1875, "num_word_doc": 49.7708, "num_word_query": 32.0234, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5607.3151, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6133, "query_norm": 1.4679, "queue_k_norm": 1.5643, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.3806, "sent_len_1": 66.7199, "sent_len_max_0": 127.9975, "sent_len_max_1": 189.365, "stdk": 0.0487, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 57.2266, "active_queue_size": 16384.0, "cl_loss": 3.024, "doc_norm": 1.5719, "encoder_q-embeddings": 1231.7278, "encoder_q-layer.0": 826.4295, "encoder_q-layer.1": 950.5107, "encoder_q-layer.10": 1278.6654, "encoder_q-layer.11": 3034.9097, "encoder_q-layer.2": 1114.2305, "encoder_q-layer.3": 1114.1316, "encoder_q-layer.4": 1277.9136, "encoder_q-layer.5": 1294.6268, "encoder_q-layer.6": 1366.8207, "encoder_q-layer.7": 1415.101, "encoder_q-layer.8": 1447.2379, "encoder_q-layer.9": 1221.5568, "epoch": 0.97, "inbatch_neg_score": 0.6151, "inbatch_pos_score": 1.3271, "learning_rate": 1.6666666666666668e-07, "loss": 3.024, "norm_diff": 0.0964, "norm_loss": 0.0, "num_token_doc": 66.7938, "num_token_overlap": 15.8377, "num_token_query": 42.3478, "num_token_union": 68.462, "num_word_context": 202.2297, "num_word_doc": 49.8521, "num_word_query": 32.0036, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2229.4142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6138, "query_norm": 1.4755, "queue_k_norm": 1.5658, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 42.3478, "sent_len_1": 66.7938, "sent_len_max_0": 127.9975, "sent_len_max_1": 190.7625, "stdk": 0.0491, "stdq": 0.0453, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 53.7109, "active_queue_size": 16384.0, "cl_loss": 3.0494, "doc_norm": 1.5608, "encoder_q-embeddings": 1266.5048, "encoder_q-layer.0": 902.5687, "encoder_q-layer.1": 981.9146, "encoder_q-layer.10": 1228.6559, "encoder_q-layer.11": 3018.1484, "encoder_q-layer.2": 1128.4332, "encoder_q-layer.3": 1136.488, "encoder_q-layer.4": 1260.3923, "encoder_q-layer.5": 1310.7689, "encoder_q-layer.6": 1337.7607, "encoder_q-layer.7": 1424.8521, "encoder_q-layer.8": 1408.3302, "encoder_q-layer.9": 1199.2961, "epoch": 0.97, "inbatch_neg_score": 0.6128, "inbatch_pos_score": 1.2832, "learning_rate": 1.1111111111111112e-07, "loss": 3.0494, "norm_diff": 0.0995, "norm_loss": 0.0, "num_token_doc": 66.6284, "num_token_overlap": 15.7187, "num_token_query": 42.1045, "num_token_union": 68.3426, "num_word_context": 202.2879, "num_word_doc": 49.721, "num_word_query": 31.8005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2212.78, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6133, "query_norm": 1.4613, "queue_k_norm": 1.5639, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 42.1045, "sent_len_1": 66.6284, "sent_len_max_0": 127.9938, "sent_len_max_1": 190.0225, "stdk": 0.0486, "stdq": 0.0447, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 57.0312, "active_queue_size": 16384.0, "cl_loss": 3.0482, "doc_norm": 1.5685, "encoder_q-embeddings": 886.7086, "encoder_q-layer.0": 597.5916, "encoder_q-layer.1": 637.2452, "encoder_q-layer.10": 1220.4293, "encoder_q-layer.11": 3029.4895, "encoder_q-layer.2": 720.5461, "encoder_q-layer.3": 754.5135, "encoder_q-layer.4": 802.9823, "encoder_q-layer.5": 825.0421, "encoder_q-layer.6": 880.9144, "encoder_q-layer.7": 1020.4401, "encoder_q-layer.8": 1195.9934, "encoder_q-layer.9": 1149.527, "epoch": 0.98, "inbatch_neg_score": 0.6121, "inbatch_pos_score": 1.3213, "learning_rate": 5.555555555555556e-08, "loss": 3.0482, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.803, "num_token_overlap": 15.7498, "num_token_query": 42.1131, "num_token_union": 68.4348, "num_word_context": 202.3372, "num_word_doc": 49.862, "num_word_query": 31.7883, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1875.6947, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6133, "query_norm": 1.4675, "queue_k_norm": 1.5651, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 42.1131, "sent_len_1": 66.803, "sent_len_max_0": 128.0, "sent_len_max_1": 189.8587, "stdk": 0.049, "stdq": 0.045, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 55.6641, "active_queue_size": 16384.0, "cl_loss": 3.0312, "doc_norm": 1.5659, "encoder_q-embeddings": 1010.0876, "encoder_q-layer.0": 664.4646, "encoder_q-layer.1": 708.9159, "encoder_q-layer.10": 1300.6945, "encoder_q-layer.11": 3066.1843, "encoder_q-layer.2": 772.8738, "encoder_q-layer.3": 813.8329, "encoder_q-layer.4": 858.3188, "encoder_q-layer.5": 896.0241, "encoder_q-layer.6": 1052.843, "encoder_q-layer.7": 1255.9768, "encoder_q-layer.8": 1457.6456, "encoder_q-layer.9": 1198.9943, "epoch": 0.98, "inbatch_neg_score": 0.6138, "inbatch_pos_score": 1.2988, "learning_rate": 0.0, "loss": 3.0312, "norm_diff": 0.1034, "norm_loss": 0.0, "num_token_doc": 66.6829, "num_token_overlap": 15.8163, "num_token_query": 42.2498, "num_token_union": 68.353, "num_word_context": 202.4196, "num_word_doc": 49.7781, "num_word_query": 31.9065, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2037.6351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6128, "query_norm": 1.4625, "queue_k_norm": 1.5642, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 42.2498, "sent_len_1": 66.6829, "sent_len_max_0": 127.9862, "sent_len_max_1": 188.1037, "stdk": 0.0488, "stdq": 0.0448, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 26.9247, "dev_samples_per_second": 2.377, "dev_steps_per_second": 0.037, "epoch": 0.98, "step": 100000, "test_accuracy": 93.95751953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3448733687400818, "test_doc_norm": 1.5522923469543457, "test_inbatch_neg_score": 0.983262300491333, "test_inbatch_pos_score": 1.9429173469543457, "test_loss": 0.3448733687400818, "test_loss_align": 0.9634833335876465, "test_loss_unif": 3.265587568283081, "test_loss_unif_q@queue": 3.265587329864502, "test_norm_diff": 0.008912239223718643, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6107872724533081, "test_query_norm": 1.5525423288345337, "test_queue_k_norm": 1.5647010803222656, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042964428663253784, "test_stdq": 0.04288874566555023, "test_stdqueue_k": 0.04890419542789459, "test_stdqueue_q": 0.0 }, { "dev_runtime": 26.9247, "dev_samples_per_second": 2.377, "dev_steps_per_second": 0.037, "epoch": 0.98, "eval_beir-arguana_ndcg@10": 0.39579, "eval_beir-arguana_recall@10": 0.66714, "eval_beir-arguana_recall@100": 0.94168, "eval_beir-arguana_recall@20": 0.80725, "eval_beir-avg_ndcg@10": 0.38330125000000004, "eval_beir-avg_recall@10": 0.4546706666666666, "eval_beir-avg_recall@100": 0.6356908333333333, "eval_beir-avg_recall@20": 0.5172629166666667, "eval_beir-cqadupstack_ndcg@10": 0.2689125, "eval_beir-cqadupstack_recall@10": 0.3638466666666666, "eval_beir-cqadupstack_recall@100": 0.6019383333333334, "eval_beir-cqadupstack_recall@20": 0.4333991666666667, "eval_beir-fiqa_ndcg@10": 0.25367, "eval_beir-fiqa_recall@10": 0.31546, "eval_beir-fiqa_recall@100": 0.59399, "eval_beir-fiqa_recall@20": 0.39133, "eval_beir-nfcorpus_ndcg@10": 0.29655, "eval_beir-nfcorpus_recall@10": 0.14464, "eval_beir-nfcorpus_recall@100": 0.28415, "eval_beir-nfcorpus_recall@20": 0.17974, "eval_beir-nq_ndcg@10": 0.2742, "eval_beir-nq_recall@10": 0.44602, "eval_beir-nq_recall@100": 0.79582, "eval_beir-nq_recall@20": 0.57443, "eval_beir-quora_ndcg@10": 0.77792, "eval_beir-quora_recall@10": 0.88573, "eval_beir-quora_recall@100": 0.97682, "eval_beir-quora_recall@20": 0.9275, "eval_beir-scidocs_ndcg@10": 0.15413, "eval_beir-scidocs_recall@10": 0.16083, "eval_beir-scidocs_recall@100": 0.37255, "eval_beir-scidocs_recall@20": 0.22063, "eval_beir-scifact_ndcg@10": 0.65609, "eval_beir-scifact_recall@10": 0.81167, "eval_beir-scifact_recall@100": 0.91156, "eval_beir-scifact_recall@20": 0.84756, "eval_beir-trec-covid_ndcg@10": 0.56947, "eval_beir-trec-covid_recall@10": 0.622, "eval_beir-trec-covid_recall@100": 0.469, "eval_beir-trec-covid_recall@20": 0.587, "eval_beir-webis-touche2020_ndcg@10": 0.18628, "eval_beir-webis-touche2020_recall@10": 0.12937, "eval_beir-webis-touche2020_recall@100": 0.4094, "eval_beir-webis-touche2020_recall@20": 0.20379, "eval_senteval-avg_sts": 0.758330062784011, "eval_senteval-sickr_spearman": 0.7277741174377886, "eval_senteval-stsb_spearman": 0.7888860081302335, "step": 100000, "test_accuracy": 93.95751953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3448733687400818, "test_doc_norm": 1.5522923469543457, "test_inbatch_neg_score": 0.983262300491333, "test_inbatch_pos_score": 1.9429173469543457, "test_loss": 0.3448733687400818, "test_loss_align": 0.9634833335876465, "test_loss_unif": 3.265587568283081, "test_loss_unif_q@queue": 3.265587329864502, "test_norm_diff": 0.008912239223718643, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6107872724533081, "test_query_norm": 1.5525423288345337, "test_queue_k_norm": 1.5647010803222656, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.042964428663253784, "test_stdq": 0.04288874566555023, "test_stdqueue_k": 0.04890419542789459, "test_stdqueue_q": 0.0 }, { "epoch": 0.98, "step": 100000, "total_flos": 0, "train_runtime": 72359.2785, "train_samples_per_second": 1.382 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }